diff --git a/src/gen.cpp b/src/gen.cpp index c88a054..16678ba 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -4,6 +4,7 @@ struct Code : public mcl::Generator { typedef std::map FunctionMap; + typedef std::vector OperandVec; Operand Void; uint32_t unit; uint32_t unit2; @@ -20,6 +21,7 @@ struct Code : public mcl::Generator { FunctionMap mcl_fp_subNCM; FunctionMap mcl_fp_addM; FunctionMap mcl_fp_subM; + FunctionMap mulPvM; Code() : unit(0), unit2(0), bit(0), N(0) { } void gen_mulUU() @@ -480,6 +482,52 @@ struct Code : public mcl::Generator { ret(Void); endFunc(); } + /* + return [px[n-1]:px[n-2]:...:px[0]] + */ + Operand pack(const Operand *px, size_t n) + { + Operand x = px[0]; + for (size_t i = 1; i < n; i++) { + Operand y = px[i]; + size_t shift = x.bit; + size_t size = x.bit + y.bit; + x = zext(x, size); + y = zext(y, size); + y = shl(y, shift); + x = _or(x, y); + } + return x; + } + /* + z = px[0..N] * y + */ + void gen_mulPv() + { + const int bu = bit + unit; + const int u2 = unit * 2; + resetGlobalIdx(); + Operand z(Int, bu); + Operand px(IntPtr, unit); + Operand y(Int, unit); + std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit); + mulPvM[bit] = Function(name, z, px, y); + beginFunc(mulPvM[bit]); + OperandVec L(N), H(N); + for (uint32_t i = 0; i < N; i++) { + Operand xy = call(mulPos, px, y, makeImm(unit, i)); + L[i] = trunc(xy, unit); + H[i] = call(extractHigh, xy); + } + Operand LL = pack(&L[0], N); + Operand HH = pack(&H[0], N); + LL = zext(LL, bu); + HH = zext(HH, bu); + HH = shl(HH, unit); + z = add(LL, HH); + ret(z); + endFunc(); + } void gen_all() { gen_mcl_fp_addsubNC(true); @@ -497,6 +545,10 @@ struct Code : public mcl::Generator { gen_mcl_fp_addL(); gen_mcl_fp_subL(); } + void gen_mul() + { + gen_mulPv(); + } void setBit(uint32_t bit) { this->bit = bit; @@ -517,6 +569,9 @@ struct Code : public mcl::Generator { gen_all(); gen_short(); gen_long(); + if (i > 64) { + gen_mul(); + } } } }; diff --git a/src/mul.txt b/src/mul.txt index b8c4827..11e876a 100644 --- a/src/mul.txt +++ b/src/mul.txt @@ -1,47 +1,5 @@ @define bu = bit + unit @define u2 = unit*2 -define i$(bu) @mulPv$(bit)x$(unit)(i$(unit)* %px, i$(unit) %y) -{ -@for i, 0, N - %x$(i)y = call i$(u2) @mulPos$(unit)x$(unit)(i$(unit) *%px, i$(unit) %y, i$(unit) $(i)) - %L$(i) = trunc i$(u2) %x$(i)y to i$(unit) - %H$(i) = call i$(unit) @extractHigh$(unit)(i$(u2) %x$(i)y) -@endfor - -@for i, 1, N - %L$(i)e = zext i$(unit) %L$(i) to i$(bit) -@if i > 1 - %L$(i)es = shl i$(bit) %L$(i)e, $(unit * i - unit) -@endif -@endfor -@if N > 2 - %Lt1 = or i$(bit) %L1e, %L2es -@endif -@for i, 1, N - 2 - %Lt$(i+1) = or i$(bit) %Lt$(i), %L$(i+2)es -@endfor - - %H0e = zext i$(unit) %H0 to i$(bit) -@for i, 1, N - %H$(i)e = zext i$(unit) %H$(i) to i$(bit) - %H$(i)es = shl i$(bit) %H$(i)e, $(unit * i) -@endfor - %Ht1 = or i$(bit) %H0e, %H1es -@for i, 2, N - %Ht$(i) = or i$(bit) %Ht$(i-1), %H$(i)es -@endfor -@if N == 2 - %t = add i$(bit) %Ht$(N-1), %L1e -@else - %t = add i$(bit) %Ht$(N-1), %Lt$(N-2) -@endif - - %t1 = zext i$(bit) %t to i$(bu) - %t2 = shl i$(bu) %t1, $(unit) - %t3 = zext i$(unit) %L0 to i$(bu) - %t4 = or i$(bu) %t2, %t3 - ret i$(bu) %t4 -} @define u2 = unit*2 define void @mcl_fp_mul_UnitPre$(bit)(i$(bu) *%pz, i$(unit) *%px, i$(unit) %y)