From c03a7cef9562db03d7359070fc4a1f7766a6c3c9 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Sun, 29 May 2016 09:28:32 +0900 Subject: [PATCH] add gen_mcl_fpDbl_mulPre --- src/gen.cpp | 34 ++++++++++++++++++++++++++++++++++ src/mul.txt | 29 ----------------------------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/src/gen.cpp b/src/gen.cpp index 8409a10..e19a992 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -23,6 +23,7 @@ struct Code : public mcl::Generator { FunctionMap mcl_fp_subM; FunctionMap mulPvM; FunctionMap mcl_fp_mul_UnitPreM; + FunctionMap mcl_fpDbl_mulPreM; Code() : unit(0), unit2(0), bit(0), N(0) { } void gen_mulUU() @@ -543,6 +544,38 @@ struct Code : public mcl::Generator { ret(Void); endFunc(); } + void gen_mcl_fpDbl_mulPre() + { + const int bu = bit + unit; + resetGlobalIdx(); + Operand pz(IntPtr, unit); + Operand px(IntPtr, unit); + Operand py(IntPtr, unit); + std::string name = "mcl_fpDbl_mulPre" + cybozu::itoa(bit); + mcl_fpDbl_mulPreM[bit] = Function(name, Void, pz, px, py); + beginFunc(mcl_fpDbl_mulPreM[bit]); + Operand y = load(py); + Operand xy = call(mulPvM[bit], px, y); + store(trunc(xy, unit), pz); + Operand t = lshr(xy, unit); + Operand z; + for (uint32_t i = 1; i < N; i++) { + py = getelementptr(py, makeImm(32, 1)); + y = load(py); + xy = call(mulPvM[bit], px, y); + t = add(t, xy); + z = trunc(t, unit); + pz = getelementptr(pz, makeImm(32, 1)); + if (i < N - 1) { + store(z, pz); + t = lshr(t, unit); + } + } + pz = bitcast(pz, Operand(IntPtr, bu)); + store(t, pz); + ret(Void); + endFunc(); + } void gen_all() { gen_mcl_fp_addsubNC(true); @@ -564,6 +597,7 @@ struct Code : public mcl::Generator { { gen_mulPv(); gen_mcl_fp_mul_UnitPre(); + gen_mcl_fpDbl_mulPre(); } void setBit(uint32_t bit) { diff --git a/src/mul.txt b/src/mul.txt index 85b1524..37a3f93 100644 --- a/src/mul.txt +++ b/src/mul.txt @@ -1,33 +1,4 @@ @define bu = bit + unit -@define u2 = unit*2 - -@define u2 = unit*2 - -define void @mcl_fpDbl_mulPre$(bit)(i$(unit)* %pz, i$(unit)* %px, i$(unit)* %py) { - %y0 = load i$(unit)* %py - %xy0 = call i$(bu) @mulPv$(bit)x$(unit)(i$(unit) *%px, i$(unit) %y0) - %z0 = trunc i$(bu) %xy0 to i$(unit) - store i$(unit) %z0, i$(unit)* %pz - %t0 = lshr i$(bu) %xy0, $(unit) - -@for i, 1, N - %py$(i) = getelementptr i$(unit)* %py, i32 $(i) - %y$(i) = load i$(unit)* %py$(i) - %xy$(i) = call i$(bu) @mulPv$(bit)x$(unit)(i$(unit)* %px, i$(unit) %y$(i)) - %a$(i) = add i$(bu) %t$(i-1), %xy$(i) - %z$(i) = trunc i$(bu) %a$(i) to i$(unit) - %pz$(i) = getelementptr i$(unit)* %pz, i32 $(i) - @if i < N - 1 - store i$(unit) %z$(i), i$(unit)* %pz$(i) - %t$(i) = lshr i$(bu) %a$(i), $(unit) - @endif -@endfor - - %pz$(N-1)e = bitcast i$(unit)* %pz$(N-1) to i$(bu)* - store i$(bu) %a$(N-1), i$(bu)* %pz$(N-1)e - - ret void -} define void @mcl_fpDbl_sqrPre$(bit)(i$(unit)* %py, i$(unit)* %px) { %x0 = load i$(unit)* %px