diff --git a/src/gen.cpp b/src/gen.cpp index e19a992..83728a6 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -24,6 +24,7 @@ struct Code : public mcl::Generator { FunctionMap mulPvM; FunctionMap mcl_fp_mul_UnitPreM; FunctionMap mcl_fpDbl_mulPreM; + FunctionMap mcl_fpDbl_sqrPreM; Code() : unit(0), unit2(0), bit(0), N(0) { } void gen_mulUU() @@ -544,16 +545,9 @@ struct Code : public mcl::Generator { ret(Void); endFunc(); } - void gen_mcl_fpDbl_mulPre() + void generic_fpDbl_mul(Operand& pz, Operand& px, Operand& py) { const int bu = bit + unit; - resetGlobalIdx(); - Operand pz(IntPtr, unit); - Operand px(IntPtr, unit); - Operand py(IntPtr, unit); - std::string name = "mcl_fpDbl_mulPre" + cybozu::itoa(bit); - mcl_fpDbl_mulPreM[bit] = Function(name, Void, pz, px, py); - beginFunc(mcl_fpDbl_mulPreM[bit]); Operand y = load(py); Operand xy = call(mulPvM[bit], px, y); store(trunc(xy, unit), pz); @@ -574,6 +568,28 @@ struct Code : public mcl::Generator { pz = bitcast(pz, Operand(IntPtr, bu)); store(t, pz); ret(Void); + } + void gen_mcl_fpDbl_mulPre() + { + resetGlobalIdx(); + Operand pz(IntPtr, unit); + Operand px(IntPtr, unit); + Operand py(IntPtr, unit); + std::string name = "mcl_fpDbl_mulPre" + cybozu::itoa(bit); + mcl_fpDbl_mulPreM[bit] = Function(name, Void, pz, px, py); + beginFunc(mcl_fpDbl_mulPreM[bit]); + generic_fpDbl_mul(pz, px, py); + endFunc(); + } + void gen_mcl_fpDbl_sqrPre() + { + resetGlobalIdx(); + Operand py(IntPtr, unit); + Operand px(IntPtr, unit); + std::string name = "mcl_fpDbl_sqrPre" + cybozu::itoa(bit); + mcl_fpDbl_sqrPreM[bit] = Function(name, Void, py, px); + beginFunc(mcl_fpDbl_sqrPreM[bit]); + generic_fpDbl_mul(py, px, px); endFunc(); } void gen_all() @@ -598,6 +614,7 @@ struct Code : public mcl::Generator { gen_mulPv(); gen_mcl_fp_mul_UnitPre(); gen_mcl_fpDbl_mulPre(); + gen_mcl_fpDbl_sqrPre(); } void setBit(uint32_t bit) { diff --git a/src/mul.txt b/src/mul.txt index 37a3f93..98151c7 100644 --- a/src/mul.txt +++ b/src/mul.txt @@ -1,30 +1,3 @@ -@define bu = bit + unit - -define void @mcl_fpDbl_sqrPre$(bit)(i$(unit)* %py, i$(unit)* %px) { - %x0 = load i$(unit)* %px - %xx0 = call i$(bu) @mulPv$(bit)x$(unit)(i$(unit) *%px, i$(unit) %x0) - %s0 = trunc i$(bu) %xx0 to i$(unit) - store i$(unit) %s0, i$(unit)* %py - %t0 = lshr i$(bu) %xx0, $(unit) - -@for i, 1, N - %px$(i) = getelementptr i$(unit)* %px, i32 $(i) - %x$(i) = load i$(unit)* %px$(i) - %xx$(i) = call i$(bu) @mulPv$(bit)x$(unit)(i$(unit)* %px, i$(unit) %x$(i)) - %a$(i) = add i$(bu) %t$(i-1), %xx$(i) - %s$(i) = trunc i$(bu) %a$(i) to i$(unit) - %py$(i) = getelementptr i$(unit)* %py, i32 $(i) - @if i < N - 1 - store i$(unit) %s$(i), i$(unit)* %py$(i) - %t$(i) = lshr i$(bu) %a$(i), $(unit) - @endif -@endfor - - %py$(N-1)e = bitcast i$(unit)* %py$(N-1) to i$(bu)* - store i$(bu) %a$(N-1), i$(bu)* %py$(N-1)e - - ret void -} @define bu = bit + unit @define bu2 = bit + unit * 2