diff --git a/src/gen.cpp b/src/gen.cpp index e1caf68..26a5439 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -346,6 +346,41 @@ struct Code : public mcl::Generator { ret(Void); endFunc(); } + void gen_mcl_fpDbl_add() + { + const int bu = bit + unit; + const int b2 = bit * 2; + const int b2u = b2 + unit; + resetGlobalIdx(); + Operand pz(IntPtr, bit); + Operand px(IntPtr, b2); + Operand py(IntPtr, b2); + Operand pp(IntPtr, bit); + std::string name = "mcl_fpDbl_add" + cybozu::itoa(bit); + Function f(name, Void, pz, px, py, pp); + beginFunc(f); + Operand x = load(px); + Operand y = load(py); + x = zext(x, b2u); + y = zext(y, b2u); + Operand t = add(x, y); // x + y = [H:L] + Operand L = trunc(t, bit); + store(L, pz); + + Operand H = lshr(t, bit); + H = trunc(H, bu); + Operand p = load(pp); + p = zext(p, bu); + Operand Hp = sub(H, p); + t = lshr(Hp, bit); + t = trunc(t, 1); + t = select(t, H, Hp); + t = trunc(t, bit); + pz = getelementptr(pz, makeImm(32, 1)); + store(t, pz); + ret(Void); + endFunc(); + } void gen_all() { gen_mcl_fp_addsubNC(true); @@ -355,6 +390,7 @@ struct Code : public mcl::Generator { { gen_mcl_fp_add(); gen_mcl_fp_sub(); + gen_mcl_fpDbl_add(); } void setBit(uint32_t bit) { diff --git a/src/short.txt b/src/short.txt index 5e913a7..5f2099d 100644 --- a/src/short.txt +++ b/src/short.txt @@ -1,30 +1,6 @@ @define bu = bit + unit @define b2 = bit * 2 @define b2u = b2 + unit -define void @mcl_fpDbl_add$(bit)(i$(bit)* %pz, i$(b2)* %px, i$(b2)* %py, i$(bit)* %pp) { -entry: - %x = load i$(b2)* %px - %y = load i$(b2)* %py - %x1 = zext i$(b2) %x to i$(b2u) - %y1 = zext i$(b2) %y to i$(b2u) - %t0 = add i$(b2u) %x1, %y1 ; x + y = [H:L] - %t1 = trunc i$(b2u) %t0 to i$(bit) ; t1 = L - store i$(bit) %t1, i$(bit)* %pz ; store L - - %t2 = lshr i$(b2u) %t0, $(bit) - %H = trunc i$(b2u) %t2 to i$(bu) - - %p = load i$(bit)* %pp - %p1 = zext i$(bit) %p to i$(bu) - %Hp = sub i$(bu) %H, %p1 ; H - p - %t3 = lshr i$(bu) %Hp, $(bit) - %t4 = trunc i$(bu) %t3 to i1 - %t5 = select i1 %t4, i$(bu) %H, i$(bu) %Hp - %t6 = trunc i$(bu) %t5 to i$(bit) - %pz1 = getelementptr i$(bit)* %pz, i32 1 - store i$(bit) %t6, i$(bit)* %pz1 - ret void -} define void @mcl_fpDbl_sub$(bit)(i$(bit)* %pz, i$(b2)* %px, i$(b2)* %py, i$(bit)* %pp) { %x = load i$(b2)* %px