diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 2d7d58a..f4450f5 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -87,7 +87,7 @@ public: op_.fpDbl_add = fpDbl_addW; op_.fpDbl_sub = fpDbl_subW; op_.fp_mul = fp_mulW; - op_.fp_mod = fp_modW; + op_.fpDbl_mod = fpDbl_modW; /* priority : MCL_USE_XBYAK > MCL_USE_LLVM > none Xbyak > llvm_opt > llvm > gmp @@ -108,7 +108,7 @@ public: if (mode == fp::FP_LLVM_MONT) { op_.fp_mul = fp_montW; op_.fp_sqr = fp_montSqrW; - op_.fp_mod = fp_montRedW; + op_.fpDbl_mod = fp_montRedW; } #if 0 fprintf(stderr, "mode=%d, isMont=%d" @@ -466,9 +466,9 @@ private: op_.fpDbl_subP(z, x, y, op_.p); } // z[N] <- xy[N * 2] % p[N] - static inline void fp_modW(Unit *z, const Unit *xy) + static inline void fpDbl_modW(Unit *z, const Unit *xy) { - op_.fp_modP(z, xy, op_.p); + op_.fpDbl_modP(z, xy, op_.p); } // z[N] <- montRed(xy[N * 2]) static inline void fp_montRedW(Unit *z, const Unit *xy) @@ -478,14 +478,14 @@ private: static inline void fp_mulW(Unit *z, const Unit *x, const Unit *y) { Unit xy[maxSize * 2]; - op_.fp_mulPre(xy, x, y); - fp_modW(z, xy); + op_.fpDbl_mulPre(xy, x, y); + fpDbl_modW(z, xy); } static inline void fp_sqrW(Unit *y, const Unit *x) { Unit xx[maxSize * 2]; - op_.fp_sqrPre(xx, x); - fp_modW(y, xx); + op_.fpDbl_sqrPre(xx, x); + fpDbl_modW(y, xx); } static inline void fp_negW(Unit *y, const Unit *x) { @@ -498,7 +498,7 @@ private: op_.montPU(z, x, y, op_.p, op_.rp); #else Unit xy[maxSize * 2]; - op_.fp_mulPre(xy, x, y); + op_.fpDbl_mulPre(xy, x, y); fp_montRedW(z, xy); #endif } @@ -508,7 +508,7 @@ private: op_.montPU(y, x, x, op_.p, op_.rp); #else Unit xx[maxSize * 2]; - op_.fp_sqrPre(xx, x); + op_.fpDbl_sqrPre(xx, x); fp_montRedW(y, xx); #endif } diff --git a/include/mcl/fp_generator.hpp b/include/mcl/fp_generator.hpp index b96fd84..771ad15 100644 --- a/include/mcl/fp_generator.hpp +++ b/include/mcl/fp_generator.hpp @@ -250,13 +250,13 @@ struct FpGenerator : Xbyak::CodeGenerator { } if (op.N == 3 || op.N == 4) { align(16); - op.fp_mod = getCurr(); - gen_fp_mod(); + op.fpDbl_mod = getCurr(); + gen_fpDbl_mod(); } if (op.N == 3 || op.N == 4) { align(16); - op.fp_mulPre = getCurr(); - gen_fp_mulPre(); + op.fpDbl_mulPre = getCurr(); + gen_fpDbl_mulPre(); } } void gen_addSubNC(bool isAdd, int n) @@ -607,7 +607,7 @@ struct FpGenerator : Xbyak::CodeGenerator { z[2..0] <- montgomery reduction(x[5..0]) @note destroy rax, rdx, t0, ..., t10 */ - void gen_fp_mod3() + void gen_fpDbl_mod3() { StackFrame sf(this, 3, 10 | UseRDX); const Reg64& z = sf.p[0]; @@ -715,7 +715,7 @@ struct FpGenerator : Xbyak::CodeGenerator { @note destroy rax, rdx, t0, ..., t10, xm0, xm1 xm2 if isFullBit_ */ - void gen_fp_mod4() + void gen_fpDbl_mod4() { StackFrame sf(this, 3, 10 | UseRDX); const Reg64& z = sf.p[0]; @@ -831,13 +831,13 @@ struct FpGenerator : Xbyak::CodeGenerator { movq(z, xm0); store_mr(z, Pack(t10, t9, t8, t4)); } - void gen_fp_mod() + void gen_fpDbl_mod() { assert(pn_ == 3 || pn_ == 4); if (pn_ == 3) { - gen_fp_mod3(); + gen_fpDbl_mod3(); } else if (pn_ == 4) { - gen_fp_mod4(); + gen_fpDbl_mod4(); } } void gen_sqr() @@ -1197,7 +1197,7 @@ struct FpGenerator : Xbyak::CodeGenerator { store_mr(pz + 8 * 3, Pack(t7, t8, t3, t2)); mov(ptr [pz + 8 * 7], d); } - void gen_fp_mulPre() + void gen_fpDbl_mulPre() { if (pn_ == 3) { StackFrame sf(this, 3, 10 | UseRDX); diff --git a/include/mcl/fp_proto.hpp b/include/mcl/fp_proto.hpp index 5bcbe8d..865f09b 100644 --- a/include/mcl/fp_proto.hpp +++ b/include/mcl/fp_proto.hpp @@ -19,8 +19,8 @@ void mcl_fp_sub ## len ## S(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl: void mcl_fp_sub ## len ## L(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ void mcl_fp_addNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fp_subNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ -void mcl_fp_mulPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ -void mcl_fp_sqrPre ## len(mcl::fp::Unit* y, const mcl::fp::Unit* x); \ +void mcl_fpDbl_mulPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ +void mcl_fpDbl_sqrPre ## len(mcl::fp::Unit* y, const mcl::fp::Unit* x); \ void mcl_fp_mont ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p, mcl::fp::Unit r); \ void mcl_fp_montRed ## len(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p, mcl::fp::Unit r); diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 24b2d6c..36315cb 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -47,8 +47,8 @@ public: /* mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) */ - static inline void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fp_mulPre(xy.v_, x.v_, y.v_); } - static inline void mod(Fp& z, const FpDblT& xy) { Fp::op_.fp_mod(z.v_, xy.v_); } + static inline void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); } + static inline void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_); } }; /* diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index cd059f9..52a1079 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -106,7 +106,7 @@ struct Op { Unit rp; // z = montRed(xy) void (*montRedPU)(Unit *z, const Unit *xy, const Unit *p, Unit rp); - // z = mont(x, y) = montRed(fp_mulPre(x, y)) + // z = mont(x, y) = montRed(fpDbl_mulPre(x, y)) void (*montPU)(Unit *z, const Unit *x, const Unit *y, const Unit *p, Unit rp); // require p @@ -114,7 +114,7 @@ struct Op { void2uOp fp_invOp; void4u fp_addP; void4u fp_subP; - void3u fp_modP; + void3u fpDbl_modP; FpGenerator *fg; /* @@ -130,9 +130,9 @@ struct Op { /* FpDbl <=> Fp */ - void2u fp_sqrPre; - void3u fp_mulPre; - void2u fp_mod; + void2u fpDbl_sqrPre; + void3u fpDbl_mulPre; + void2u fpDbl_mod; /* for Fp2 = F[u] / (u^2 + 1) @@ -154,12 +154,12 @@ struct Op { , isFullBit(true), fp_addNC(0), fp_subNC(0) , isMont(false), fp_preInv(0) , rp(0), montRedPU(0), montPU(0) - , fp_negP(0), fp_invOp(0), fp_addP(0), fp_subP(0), fp_modP(0) + , fp_negP(0), fp_invOp(0), fp_addP(0), fp_subP(0), fpDbl_modP(0) , fg(createFpGenerator()) , fpDbl_add(0), fpDbl_sub() , fpDbl_addP(0), fpDbl_subP() , fpDbl_addNC(0), fpDbl_subNC() - , fp_sqrPre(0), fp_mulPre(0), fp_mod(0) + , fpDbl_sqrPre(0), fpDbl_mulPre(0), fpDbl_mod(0) , xi_c(0) , fp2_add(0), fp2_sub(0), fp2_mul(0), fp2_neg(0) , fp2_sqr(0), fp2_mul_xi(0) diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp index 2a01f28..a5bc47e 100644 --- a/sample/rawbench.cpp +++ b/sample/rawbench.cpp @@ -52,7 +52,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode) memcpy(uy, fy.getUnit(), sizeof(fy)); double fp_sqrT, fp_addT, fp_subT, fp_mulT; double fpDbl_addT, fpDbl_subT; -// double fp_sqrPreT, fp_mulPreT, fp_modT; +// double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT; // double fp2_mulT, fp2_sqrT; // double fp_addNCT, fp_subNCT, fpDbl_addNCT,fpDbl_subNCT; CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, ux, ux); diff --git a/src/fp.cpp b/src/fp.cpp index 0142ec7..8bd0c23 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -190,7 +190,7 @@ struct OpeFunc { Gmp::getArray(z, N, mz); } // z[N * 2] <- x[N] * y[N] - static inline void fp_mulPreC(Unit *z, const Unit *x, const Unit *y) + static inline void fpDbl_mulPreC(Unit *z, const Unit *x, const Unit *y) { mpz_t mx, my, mz; set_zero(mz, z, N * 2); @@ -200,7 +200,7 @@ struct OpeFunc { clearArray(z, mz->_mp_size, N * 2); } // y[N * 2] <- x[N]^2 - static inline void fp_sqrPreC(Unit *y, const Unit *x) + static inline void fpDbl_sqrPreC(Unit *y, const Unit *x) { mpz_t mx, my; set_zero(my, y, N * 2); @@ -209,7 +209,7 @@ struct OpeFunc { clearArray(y, my->_mp_size, N * 2); } // y[N] <- x[N * 2] mod p[N] - static inline void fp_modPC(Unit *y, const Unit *x, const Unit *p) + static inline void fpDbl_modPC(Unit *y, const Unit *x, const Unit *p) { mpz_t mx, my, mp; set_mpz_t(mx, x, N * 2); @@ -258,9 +258,9 @@ struct OpeFunc { fp_addNC = mcl_fp_addNC ## n; \ fp_subNC = mcl_fp_subNC ## n; \ } \ - fp_mulPre = mcl_fp_mulPre ## n; \ + fpDbl_mulPre = mcl_fpDbl_mulPre ## n; \ if (n <= 256) { \ - fp_sqrPre = mcl_fp_sqrPre ## n; \ + fpDbl_sqrPre = mcl_fpDbl_sqrPre ## n; \ } \ montPU = mcl_fp_mont ## n; \ montRedPU = mcl_fp_montRed ## n; \ @@ -311,9 +311,9 @@ struct OpeFunc { fpDbl_subNC = OpeFunc::fp_subNCC; \ } \ } \ - fp_mulPre = OpeFunc::fp_mulPreC; \ - fp_sqrPre = OpeFunc::fp_sqrPreC; \ - fp_modP = OpeFunc::fp_modPC; \ + fpDbl_mulPre = OpeFunc::fpDbl_mulPreC; \ + fpDbl_sqrPre = OpeFunc::fpDbl_sqrPreC; \ + fpDbl_modP = OpeFunc::fpDbl_modPC; \ SET_OP_LLVM(n) #ifdef MCL_USE_XBYAK diff --git a/src/mul.txt b/src/mul.txt index d62c53e..cbde576 100644 --- a/src/mul.txt +++ b/src/mul.txt @@ -18,7 +18,7 @@ noinline @endfor ret i$(bu) %t$(N-2) } -define void @mcl_fp_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) { +define void @mcl_fpDbl_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) { %x = load i$(bit)* %px %y = load i$(bit)* %py @for i, 0, N @@ -42,7 +42,7 @@ define void @mcl_fp_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) { store i$(bu) %sum$(N-1), i$(bu)* %p ret void } -define void @mcl_fp_sqrPre$(bit)(i$(unit)* %py, i$(bit)* %px) { +define void @mcl_fpDbl_sqrPre$(bit)(i$(unit)* %py, i$(bit)* %px) { %x = load i$(bit)* %px @for i, 0, N %x$(i) = call i$(unit) @extract$(bit)(i$(bit) %x, i$(bit) $(unit*i)) diff --git a/src/once.txt b/src/once.txt index f3c42ac..05c242d 100644 --- a/src/once.txt +++ b/src/once.txt @@ -30,7 +30,7 @@ define internal i64 @extract192to64(i192 %x, i192 %shift) { ret i64 %t1 } -define internal void @modNIST_P192(i192* %out, i192* %px) { +define internal void @fpDbl_modNIST_P192(i192* %out, i192* %px) { %L192 = load i192* %px %L = zext i192 %L192 to i256 @@ -68,7 +68,7 @@ define internal void @modNIST_P192(i192* %out, i192* %px) { define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) { %buf = alloca i192, i32 2 %p = bitcast i192* %buf to i$(unit)* - call void @mcl_fp_mulPre192(i$(unit)* %p, i192* %px, i192* %py) - call void @modNIST_P192(i192* %pz, i192* %buf) + call void @mcl_fpDbl_mulPre192(i$(unit)* %p, i192* %px, i192* %py) + call void @fpDbl_modNIST_P192(i192* %pz, i192* %buf) ret void }