diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 4802038..26d96e1 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -125,8 +125,6 @@ public: static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } #endif - static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); } - static void sqrPreC(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); } /* mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) */ @@ -152,16 +150,8 @@ public: subPre = fp::func_ptr_cast(op.fpDbl_subPre); if (subPre == 0) subPre = subPreC; #endif - if (op.fpDbl_mulPreA_) { - mulPre = fp::func_ptr_cast(op.fpDbl_mulPreA_); - } else { - mulPre = mulPreC; - } - if (op.fpDbl_sqrPreA_) { - sqrPre = fp::func_ptr_cast(op.fpDbl_sqrPreA_); - } else { - sqrPre = sqrPreC; - } + mulPre = fp::func_ptr_cast(op.fpDbl_mulPre); + sqrPre = fp::func_ptr_cast(op.fpDbl_sqrPre); } void operator+=(const FpDblT& x) { add(*this, *this, x); } void operator-=(const FpDblT& x) { sub(*this, *this, x); } @@ -690,9 +680,9 @@ struct Fp2DblT { mulPre = fp::func_ptr_cast(op.fp2Dbl_mulPreA_); } else { if (op.isFullBit) { - mulPre = fp2Dbl_mulPreW; + mulPre = fp2Dbl_mulPreTW; } else { - mulPre = fp2Dbl_mulPreW; + mulPre = fp2Dbl_mulPreTW; } } if (op.fp2Dbl_sqrPreA_) { @@ -710,7 +700,7 @@ struct Fp2DblT { @note mod of NIST_P192 is fast */ template - static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y) + static void fp2Dbl_mulPreTW(Fp2DblT& z, const Fp2& x, const Fp2& y) { const Fp& a = x.a; const Fp& b = x.b; diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 29ca9f8..e3d78d8 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -221,8 +221,6 @@ struct Op { void2u fp2_sqrA_; void3u fpDbl_addA_; void3u fpDbl_subA_; - void3u fpDbl_mulPreA_; - void2u fpDbl_sqrPreA_; void2u fpDbl_modA_; void3u fp2Dbl_mulPreA_; void2u fp2Dbl_sqrPreA_; @@ -308,8 +306,6 @@ struct Op { fp2_sqrA_ = 0; fpDbl_addA_ = 0; fpDbl_subA_ = 0; - fpDbl_mulPreA_ = 0; - fpDbl_sqrPreA_ = 0; fpDbl_modA_ = 0; fp2Dbl_mulPreA_ = 0; fp2Dbl_sqrPreA_ = 0; diff --git a/src/fp.cpp b/src/fp.cpp index cd3266e..0534580 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -426,6 +426,7 @@ void setWasmOp(Op& op) op.fp_mul = mcl::mulMontT; op.fp_sqr = mcl::sqrMontT; op.fpDbl_mulPre = mulT; +// op.fpDbl_sqrPre = sqrT; op.fpDbl_mod = modT; } #endif diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 3227d93..ef38b63 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -421,12 +421,12 @@ private: setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr()); align(16); - op.fpDbl_mulPreA_ = gen_fpDbl_mulPre(); - setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr()); + gen_fpDbl_mulPre(op.fpDbl_mulPre); + setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr()); align(16); - op.fpDbl_sqrPreA_ = gen_fpDbl_sqrPre(); - setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPreA_, getCurr()); + gen_fpDbl_sqrPre(op.fpDbl_sqrPre); + setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr()); align(16); op.fp2_addA_ = gen_fp2_add(); @@ -2373,36 +2373,35 @@ private: cmovc_rr(zp, keep); store_mr(z, zp); } - void2u gen_fpDbl_sqrPre() + void gen_fpDbl_sqrPre(void2u& f) { void2u func = getCurr(); if (pn_ == 2 && useMulx_) { StackFrame sf(this, 2, 7 | UseRDX); sqrPre2(sf.p[0], sf.p[1], sf.t); - return func; + f = func; } if (pn_ == 3) { StackFrame sf(this, 3, 10 | UseRDX); Pack t = sf.t; t.append(sf.p[2]); sqrPre3(sf.p[0], sf.p[1], t); - return func; + f = func; } if (pn_ == 4 && useMulx_) { StackFrame sf(this, 3, 10 | UseRDX); Pack t = sf.t; t.append(sf.p[2]); sqrPre4(sf.p[0], sf.p[1], t); - return func; + f = func; } if (pn_ == 6 && useMulx_ && useAdx_) { StackFrame sf(this, 3, 10 | UseRDX, 6 * 8); Pack t = sf.t; t.append(sf.p[2]); sqrPre6(sf.p[0], sf.p[1], t); - return func; + f = func; } - return 0; #if 0 #ifdef XBYAK64_WIN mov(r8, rdx); @@ -2413,18 +2412,18 @@ private: return func; #endif } - void3u gen_fpDbl_mulPre() + void gen_fpDbl_mulPre(void3u& f) { void3u func = getCurr(); if (pn_ == 2 && useMulx_) { StackFrame sf(this, 3, 5 | UseRDX); mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t); - return func; + f = func; } if (pn_ == 3) { StackFrame sf(this, 3, 10 | UseRDX); mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t); - return func; + f = func; } if (pn_ == 4) { /* @@ -2437,7 +2436,7 @@ private: L(mulPreL); // called only from asm code mulPre4(gp0, gp1, gp2, sf.t); ret(); - return func; + f = func; } if (pn_ == 6 && useAdx_) { StackFrame sf(this, 3, 10 | UseRDX, 0, false); @@ -2446,9 +2445,8 @@ private: L(mulPreL); // called only from asm code mulPre6(sf.t); ret(); - return func; + f = func; } - return 0; } static inline void debug_put_inner(const uint64_t *ptr, int n) { diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp index 09d4d01..7421f0a 100644 --- a/src/fp_static_code.hpp +++ b/src/fp_static_code.hpp @@ -65,8 +65,8 @@ void setStaticCode(mcl::fp::Op& op) op.fpDbl_subA_ = mclx_FpDbl_sub; op.fpDbl_addPre = mclx_FpDbl_addPre; op.fpDbl_subPre = mclx_FpDbl_subPre; - op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre; - op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre; + op.fpDbl_mulPre = mclx_FpDbl_mulPre; + op.fpDbl_sqrPre = mclx_FpDbl_sqrPre; op.fp2_addA_ = mclx_Fp2_add; op.fp2_subA_ = mclx_Fp2_sub; op.fp2_negA_ = mclx_Fp2_neg;