From 52a9f4d2135782a43fc2bf64a880ef97232e8f27 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 8 Feb 2021 15:28:31 +0900 Subject: [PATCH] remove mulPreC --- include/mcl/fp_tower.hpp | 9 +++------ include/mcl/op.hpp | 2 -- src/fp_generator.hpp | 28 +++++++++++++--------------- src/fp_static_code.hpp | 2 +- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 65aedd7..730a044 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -113,24 +113,22 @@ public: static void (*mod)(Fp& z, const FpDblT& xy); static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); - static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y); static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } - static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); } #else static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } - static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); } #endif /* mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) */ + static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y); static void (*sqrPre)(FpDblT& xx, const Fp& x); static void mulUnit(FpDblT& z, const FpDblT& x, Unit y) { @@ -151,9 +149,8 @@ public: if (addPre == 0) addPre = addPreC; subPre = fp::func_ptr_cast(op.fpDbl_subPre); if (subPre == 0) subPre = subPreC; - mulPre = fp::func_ptr_cast(op.fpDbl_mulPreA_); - if (mulPre == 0) mulPre = mulPreC; #endif + mulPre = fp::func_ptr_cast(op.fpDbl_mulPre); sqrPre = fp::func_ptr_cast(op.fpDbl_sqrPre); } void operator+=(const FpDblT& x) { add(*this, *this, x); } @@ -166,8 +163,8 @@ template void (*FpDblT::sub)(FpDblT&, const FpDblT&, const FpDblT& template void (*FpDblT::mod)(Fp&, const FpDblT&); template void (*FpDblT::addPre)(FpDblT&, const FpDblT&, const FpDblT&); template void (*FpDblT::subPre)(FpDblT&, const FpDblT&, const FpDblT&); -template void (*FpDblT::mulPre)(FpDblT&, const Fp&, const Fp&); #endif +template void (*FpDblT::mulPre)(FpDblT&, const Fp&, const Fp&); template void (*FpDblT::sqrPre)(FpDblT&, const Fp&); template struct Fp12T; diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index b8c1dbe..e3d78d8 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -221,7 +221,6 @@ struct Op { void2u fp2_sqrA_; void3u fpDbl_addA_; void3u fpDbl_subA_; - void3u fpDbl_mulPreA_; void2u fpDbl_modA_; void3u fp2Dbl_mulPreA_; void2u fp2Dbl_sqrPreA_; @@ -307,7 +306,6 @@ struct Op { fp2_sqrA_ = 0; fpDbl_addA_ = 0; fpDbl_subA_ = 0; - fpDbl_mulPreA_ = 0; fpDbl_modA_ = 0; fp2Dbl_mulPreA_ = 0; fp2Dbl_sqrPreA_ = 0; diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 5ab7f9c..ef38b63 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -421,11 +421,11 @@ private: setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr()); align(16); - op.fpDbl_mulPreA_ = gen_fpDbl_mulPre(); - setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr()); + gen_fpDbl_mulPre(op.fpDbl_mulPre); + setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr()); align(16); - op.fpDbl_sqrPre = gen_fpDbl_sqrPre(); + gen_fpDbl_sqrPre(op.fpDbl_sqrPre); setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr()); align(16); @@ -2373,36 +2373,35 @@ private: cmovc_rr(zp, keep); store_mr(z, zp); } - void2u gen_fpDbl_sqrPre() + void gen_fpDbl_sqrPre(void2u& f) { void2u func = getCurr(); if (pn_ == 2 && useMulx_) { StackFrame sf(this, 2, 7 | UseRDX); sqrPre2(sf.p[0], sf.p[1], sf.t); - return func; + f = func; } if (pn_ == 3) { StackFrame sf(this, 3, 10 | UseRDX); Pack t = sf.t; t.append(sf.p[2]); sqrPre3(sf.p[0], sf.p[1], t); - return func; + f = func; } if (pn_ == 4 && useMulx_) { StackFrame sf(this, 3, 10 | UseRDX); Pack t = sf.t; t.append(sf.p[2]); sqrPre4(sf.p[0], sf.p[1], t); - return func; + f = func; } if (pn_ == 6 && useMulx_ && useAdx_) { StackFrame sf(this, 3, 10 | UseRDX, 6 * 8); Pack t = sf.t; t.append(sf.p[2]); sqrPre6(sf.p[0], sf.p[1], t); - return func; + f = func; } - return 0; #if 0 #ifdef XBYAK64_WIN mov(r8, rdx); @@ -2413,18 +2412,18 @@ private: return func; #endif } - void3u gen_fpDbl_mulPre() + void gen_fpDbl_mulPre(void3u& f) { void3u func = getCurr(); if (pn_ == 2 && useMulx_) { StackFrame sf(this, 3, 5 | UseRDX); mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t); - return func; + f = func; } if (pn_ == 3) { StackFrame sf(this, 3, 10 | UseRDX); mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t); - return func; + f = func; } if (pn_ == 4) { /* @@ -2437,7 +2436,7 @@ private: L(mulPreL); // called only from asm code mulPre4(gp0, gp1, gp2, sf.t); ret(); - return func; + f = func; } if (pn_ == 6 && useAdx_) { StackFrame sf(this, 3, 10 | UseRDX, 0, false); @@ -2446,9 +2445,8 @@ private: L(mulPreL); // called only from asm code mulPre6(sf.t); ret(); - return func; + f = func; } - return 0; } static inline void debug_put_inner(const uint64_t *ptr, int n) { diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp index 6584410..7421f0a 100644 --- a/src/fp_static_code.hpp +++ b/src/fp_static_code.hpp @@ -65,7 +65,7 @@ void setStaticCode(mcl::fp::Op& op) op.fpDbl_subA_ = mclx_FpDbl_sub; op.fpDbl_addPre = mclx_FpDbl_addPre; op.fpDbl_subPre = mclx_FpDbl_subPre; - op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre; + op.fpDbl_mulPre = mclx_FpDbl_mulPre; op.fpDbl_sqrPre = mclx_FpDbl_sqrPre; op.fp2_addA_ = mclx_Fp2_add; op.fp2_subA_ = mclx_Fp2_sub;