From 7806835e9c03543e88f4f7a0f52b8761ff48e241 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 7 May 2021 17:27:20 +0900 Subject: [PATCH 01/23] [pedantic] avoid undefined behavior of abs --- include/mcl/fp.hpp | 2 +- include/mcl/util.hpp | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 706d9fa..f41d4f8 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -227,7 +227,7 @@ public: } else { clear(); if (x) { - int64_t y = x < 0 ? -x : x; + uint64_t y = fp::abs_(x); if (sizeof(Unit) == 8) { v_[0] = y; } else { diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp index 8915c88..b35801e 100644 --- a/include/mcl/util.hpp +++ b/include/mcl/util.hpp @@ -17,8 +17,21 @@ namespace mcl { namespace fp { // some environments do not have utility -template -T abs_(T x) { return x < 0 ? -x : x; } +inline uint32_t abs_(int32_t x) +{ + if (x >= 0) return uint32_t(x); + // avoid undefined behavior + if (x == -2147483647 - 1) return 2147483648u; + return uint32_t(-x); +} + +inline uint64_t abs_(int64_t x) +{ + if (x >= 0) return uint64_t(x); + // avoid undefined behavior + if (x == -9223372036854775807ll - 1) return 9223372036854775808ull; + return uint64_t(-x); +} template T min_(T x, T y) { return x < y ? x : y; } From 093e916151f8728071d97fb91c4e87c99a6b6c5e Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 7 May 2021 17:39:20 +0900 Subject: [PATCH 02/23] fix for (bitlen(p) + 1) % 64 = 0 --- include/mcl/gmp_util.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp index e444993..4111c37 100644 --- a/include/mcl/gmp_util.hpp +++ b/include/mcl/gmp_util.hpp @@ -960,6 +960,7 @@ struct SmallModp { } uint32_t getTop(const Unit *x) const { + if (shiftR_ == 0) return x[N_ - 1]; return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_); } uint32_t cvtInt(const mpz_class& x) const From 1653541c0a9e484083d2025ced19b1120d637ec3 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Sun, 9 May 2021 17:57:35 +0900 Subject: [PATCH 03/23] fix init bool --- include/mcl/fp_tower.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 451436b..93d5654 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -442,6 +442,7 @@ public: Fp2T::mul(g2[i], t, g[i]); g3[i] = g[i] * g2[i]; } + *pb = true; } #ifndef CYBOZU_DONT_USE_EXCEPTION static void init() From 5dcd5c53b7e496ca8eac4ccc5e154941d8146592 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Sun, 9 May 2021 18:34:10 +0900 Subject: [PATCH 04/23] avoid cast of fpDbl_add --- include/mcl/fp_tower.hpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 93d5654..5b039a1 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -113,19 +113,25 @@ public: { gmp::setArray(pb, x, v_, Fp::op_.N * 2); } + static inline void add(FpDblT& z, const FpDblT& x, const FpDblT& y) + { +#ifdef MCL_XBYAK_DIRECT_CALL + Fp::op_.fpDbl_addA_(z.v_, x.v_, y.v_); +#else + Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); +#endif + } #ifdef MCL_XBYAK_DIRECT_CALL - static void (*add)(FpDblT& z, const FpDblT& x, const FpDblT& y); + static void addA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_add(z, x, y, Fp::op_.p); } static void (*sub)(FpDblT& z, const FpDblT& x, const FpDblT& y); static void (*mod)(Fp& z, const FpDblT& xy); static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); - static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } #else - static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } @@ -152,9 +158,10 @@ public: static void init() { #ifdef MCL_XBYAK_DIRECT_CALL - const mcl::fp::Op& op = Fp::getOp(); - add = fp::func_ptr_cast(op.fpDbl_addA_); - if (add == 0) add = addC; + mcl::fp::Op& op = Fp::getOpNonConst(); + if (op.fpDbl_addA_ == 0) { + op.fpDbl_addA_ = addA; + } sub = fp::func_ptr_cast(op.fpDbl_subA_); if (sub == 0) sub = subC; mod = fp::func_ptr_cast(op.fpDbl_modA_); @@ -170,7 +177,6 @@ public: }; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*FpDblT::add)(FpDblT&, const FpDblT&, const FpDblT&); template void (*FpDblT::sub)(FpDblT&, const FpDblT&, const FpDblT&); template void (*FpDblT::mod)(Fp&, const FpDblT&); template void (*FpDblT::addPre)(FpDblT&, const FpDblT&, const FpDblT&); From 52a83ac5267f48d1dc2b9d15eb2fb675dbc40239 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 09:25:43 +0900 Subject: [PATCH 05/23] avoid cast of fpDbl_sub --- include/mcl/fp_tower.hpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 5b039a1..61ce62b 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -119,20 +119,26 @@ public: Fp::op_.fpDbl_addA_(z.v_, x.v_, y.v_); #else Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); +#endif + } + static inline void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) + { +#ifdef MCL_XBYAK_DIRECT_CALL + Fp::op_.fpDbl_subA_(z.v_, x.v_, y.v_); +#else + Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); #endif } #ifdef MCL_XBYAK_DIRECT_CALL static void addA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_add(z, x, y, Fp::op_.p); } - static void (*sub)(FpDblT& z, const FpDblT& x, const FpDblT& y); + static void subA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_sub(z, x, y, Fp::op_.p); } static void (*mod)(Fp& z, const FpDblT& xy); static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); - static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } #else - static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } @@ -162,8 +168,9 @@ public: if (op.fpDbl_addA_ == 0) { op.fpDbl_addA_ = addA; } - sub = fp::func_ptr_cast(op.fpDbl_subA_); - if (sub == 0) sub = subC; + if (op.fpDbl_subA_ == 0) { + op.fpDbl_subA_ = subA; + } mod = fp::func_ptr_cast(op.fpDbl_modA_); if (mod == 0) mod = modC; addPre = fp::func_ptr_cast(op.fpDbl_addPre); @@ -177,7 +184,6 @@ public: }; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*FpDblT::sub)(FpDblT&, const FpDblT&, const FpDblT&); template void (*FpDblT::mod)(Fp&, const FpDblT&); template void (*FpDblT::addPre)(FpDblT&, const FpDblT&, const FpDblT&); template void (*FpDblT::subPre)(FpDblT&, const FpDblT&, const FpDblT&); From 55cd686b2259a9ca09d833e061582db69903c0ae Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 09:32:12 +0900 Subject: [PATCH 06/23] avoid cast of fpDbl_mod --- include/mcl/fp_tower.hpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 61ce62b..3acff3f 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -127,19 +127,25 @@ public: Fp::op_.fpDbl_subA_(z.v_, x.v_, y.v_); #else Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); +#endif + } + static inline void mod(Fp& z, const FpDblT& xy) + { +#ifdef MCL_XBYAK_DIRECT_CALL + Fp::op_.fpDbl_modA_(z.v_, xy.v_); +#else + Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); #endif } #ifdef MCL_XBYAK_DIRECT_CALL static void addA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_add(z, x, y, Fp::op_.p); } static void subA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_sub(z, x, y, Fp::op_.p); } - static void (*mod)(Fp& z, const FpDblT& xy); + static void modA(Unit *z, const Unit *xy) { Fp::op_.fpDbl_mod(z, xy, Fp::op_.p); } static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); - static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } #else - static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } #endif @@ -171,8 +177,9 @@ public: if (op.fpDbl_subA_ == 0) { op.fpDbl_subA_ = subA; } - mod = fp::func_ptr_cast(op.fpDbl_modA_); - if (mod == 0) mod = modC; + if (op.fpDbl_modA_ == 0) { + op.fpDbl_modA_ = modA; + } addPre = fp::func_ptr_cast(op.fpDbl_addPre); if (addPre == 0) addPre = addPreC; subPre = fp::func_ptr_cast(op.fpDbl_subPre); @@ -184,7 +191,6 @@ public: }; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*FpDblT::mod)(Fp&, const FpDblT&); template void (*FpDblT::addPre)(FpDblT&, const FpDblT&, const FpDblT&); template void (*FpDblT::subPre)(FpDblT&, const FpDblT&, const FpDblT&); #endif From 676488081a60c1371511bd00388de81acc0d8801 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 10:03:41 +0900 Subject: [PATCH 07/23] avoid cast of fpDbl_addPre/subPre --- include/mcl/fp_tower.hpp | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 3acff3f..31f97c8 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -141,27 +141,14 @@ public: static void addA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_add(z, x, y, Fp::op_.p); } static void subA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_sub(z, x, y, Fp::op_.p); } static void modA(Unit *z, const Unit *xy) { Fp::op_.fpDbl_mod(z, xy, Fp::op_.p); } - static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); - static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); - static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } - static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } -#else +#endif static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } -#endif /* mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) */ - static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) - { - const mcl::fp::Op& op = Fp::getOp(); - op.fpDbl_mulPre(xy.v_, x.v_, y.v_); - } - static void sqrPre(FpDblT& xx, const Fp& x) - { - const mcl::fp::Op& op = Fp::getOp(); - op.fpDbl_sqrPre(xx.v_, x.v_); - } + static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); } + static void sqrPre(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); } static void mulUnit(FpDblT& z, const FpDblT& x, Unit y) { if (mulSmallUnit(z, x, y)) return; @@ -170,7 +157,7 @@ public: static void init() { #ifdef MCL_XBYAK_DIRECT_CALL - mcl::fp::Op& op = Fp::getOpNonConst(); + mcl::fp::Op& op = Fp::op_; if (op.fpDbl_addA_ == 0) { op.fpDbl_addA_ = addA; } @@ -180,21 +167,12 @@ public: if (op.fpDbl_modA_ == 0) { op.fpDbl_modA_ = modA; } - addPre = fp::func_ptr_cast(op.fpDbl_addPre); - if (addPre == 0) addPre = addPreC; - subPre = fp::func_ptr_cast(op.fpDbl_subPre); - if (subPre == 0) subPre = subPreC; #endif } void operator+=(const FpDblT& x) { add(*this, *this, x); } void operator-=(const FpDblT& x) { sub(*this, *this, x); } }; -#ifdef MCL_XBYAK_DIRECT_CALL -template void (*FpDblT::addPre)(FpDblT&, const FpDblT&, const FpDblT&); -template void (*FpDblT::subPre)(FpDblT&, const FpDblT&, const FpDblT&); -#endif - /* beta = -1 Fp2 = F[i] / (i^2 + 1) From bb83774fadbaa0b6daf137700b325bc121df2c79 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 11:12:00 +0900 Subject: [PATCH 08/23] avoid cast of Fp2::add --- include/mcl/fp_tower.hpp | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 31f97c8..41039ba 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -223,15 +223,21 @@ public: a = a_; b = b_; } + static void add(Fp2T& z, const Fp2T& x, const Fp2T& y) + { +#ifdef MCL_XBYAK_DIRECT_CALL + Fp::op_.fp2_addA_(z.a.v_, x.a.v_, y.a.v_); +#else + addA(z.a.v_, x.a.v_, y.a.v_); +#endif + } #ifdef MCL_XBYAK_DIRECT_CALL - static void (*add)(Fp2T& z, const Fp2T& x, const Fp2T& y); static void (*sub)(Fp2T& z, const Fp2T& x, const Fp2T& y); static void (*neg)(Fp2T& y, const Fp2T& x); static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); static void (*sqr)(Fp2T& y, const Fp2T& x); static void (*mul2)(Fp2T& y, const Fp2T& x); #else - static void add(Fp2T& z, const Fp2T& x, const Fp2T& y) { addC(z, x, y); } static void sub(Fp2T& z, const Fp2T& x, const Fp2T& y) { subC(z, x, y); } static void neg(Fp2T& y, const Fp2T& x) { negC(y, x); } static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) { mulC(z, x, y); } @@ -376,7 +382,6 @@ public: static uint32_t get_xi_a() { return Fp::getOp().xi_a; } static void init(bool *pb) { -// assert(Fp::maxSize <= 256); mcl::fp::Op& op = Fp::op_; assert(op.xi_a); // assume p < W/4 where W = 1 << (N * sizeof(Unit) * 8) @@ -386,8 +391,9 @@ public: } mul_xi = 0; #ifdef MCL_XBYAK_DIRECT_CALL - add = fp::func_ptr_cast(op.fp2_addA_); - if (add == 0) add = addC; + if (op.fp2_addA_ == 0) { + op.fp2_addA_ = addA; + } sub = fp::func_ptr_cast(op.fp2_subA_); if (sub == 0) sub = subC; neg = fp::func_ptr_cast(op.fp2_negA_); @@ -487,8 +493,11 @@ private: default Fp2T operator Fp2T = Fp[i]/(i^2 + 1) */ - static void addC(Fp2T& z, const Fp2T& x, const Fp2T& y) + static void addA(Unit *pz, const Unit *px, const Unit *py) { + Fp2T& z = *reinterpret_cast(pz); + const Fp2T& x = *reinterpret_cast(px); + const Fp2T& y = *reinterpret_cast(py); Fp::add(z.a, x.a, y.a); Fp::add(z.b, x.b, y.b); } @@ -594,7 +603,6 @@ private: }; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*Fp2T::add)(Fp2T& z, const Fp2T& x, const Fp2T& y); template void (*Fp2T::sub)(Fp2T& z, const Fp2T& x, const Fp2T& y); template void (*Fp2T::neg)(Fp2T& y, const Fp2T& x); template void (*Fp2T::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); From a36e6f276bfcf6c42634a3c50c897feac6742723 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 11:14:02 +0900 Subject: [PATCH 09/23] avoid cast of Fp2::sub --- include/mcl/fp_tower.hpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 41039ba..04c972e 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -229,16 +229,22 @@ public: Fp::op_.fp2_addA_(z.a.v_, x.a.v_, y.a.v_); #else addA(z.a.v_, x.a.v_, y.a.v_); +#endif + } + static void sub(Fp2T& z, const Fp2T& x, const Fp2T& y) + { +#ifdef MCL_XBYAK_DIRECT_CALL + Fp::op_.fp2_subA_(z.a.v_, x.a.v_, y.a.v_); +#else + subA(z.a.v_, x.a.v_, y.a.v_); #endif } #ifdef MCL_XBYAK_DIRECT_CALL - static void (*sub)(Fp2T& z, const Fp2T& x, const Fp2T& y); static void (*neg)(Fp2T& y, const Fp2T& x); static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); static void (*sqr)(Fp2T& y, const Fp2T& x); static void (*mul2)(Fp2T& y, const Fp2T& x); #else - static void sub(Fp2T& z, const Fp2T& x, const Fp2T& y) { subC(z, x, y); } static void neg(Fp2T& y, const Fp2T& x) { negC(y, x); } static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) { mulC(z, x, y); } static void sqr(Fp2T& y, const Fp2T& x) { sqrC(y, x); } @@ -394,8 +400,9 @@ public: if (op.fp2_addA_ == 0) { op.fp2_addA_ = addA; } - sub = fp::func_ptr_cast(op.fp2_subA_); - if (sub == 0) sub = subC; + if (op.fp2_subA_ == 0) { + op.fp2_subA_ = subA; + } neg = fp::func_ptr_cast(op.fp2_negA_); if (neg == 0) neg = negC; mul = fp::func_ptr_cast(op.fp2_mulA_); @@ -501,8 +508,11 @@ private: Fp::add(z.a, x.a, y.a); Fp::add(z.b, x.b, y.b); } - static void subC(Fp2T& z, const Fp2T& x, const Fp2T& y) + static void subA(Unit *pz, const Unit *px, const Unit *py) { + Fp2T& z = *reinterpret_cast(pz); + const Fp2T& x = *reinterpret_cast(px); + const Fp2T& y = *reinterpret_cast(py); Fp::sub(z.a, x.a, y.a); Fp::sub(z.b, x.b, y.b); } @@ -603,7 +613,6 @@ private: }; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*Fp2T::sub)(Fp2T& z, const Fp2T& x, const Fp2T& y); template void (*Fp2T::neg)(Fp2T& y, const Fp2T& x); template void (*Fp2T::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); template void (*Fp2T::sqr)(Fp2T& y, const Fp2T& x); From 76e87c23c679725811fe2d1406e12e246e84b264 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 11:32:21 +0900 Subject: [PATCH 10/23] avoid cast of Fp2::neg --- include/mcl/fp_tower.hpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 04c972e..3d2c317 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -237,15 +237,21 @@ public: Fp::op_.fp2_subA_(z.a.v_, x.a.v_, y.a.v_); #else subA(z.a.v_, x.a.v_, y.a.v_); +#endif + } + static void neg(Fp2T& y, const Fp2T& x) + { +#ifdef MCL_XBYAK_DIRECT_CALL + Fp::op_.fp2_negA_(y.a.v_, x.a.v_); +#else + negA(y.a.v_, x.a.v_); #endif } #ifdef MCL_XBYAK_DIRECT_CALL - static void (*neg)(Fp2T& y, const Fp2T& x); static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); static void (*sqr)(Fp2T& y, const Fp2T& x); static void (*mul2)(Fp2T& y, const Fp2T& x); #else - static void neg(Fp2T& y, const Fp2T& x) { negC(y, x); } static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) { mulC(z, x, y); } static void sqr(Fp2T& y, const Fp2T& x) { sqrC(y, x); } static void mul2(Fp2T& y, const Fp2T& x) { mul2C(y, x); } @@ -403,8 +409,9 @@ public: if (op.fp2_subA_ == 0) { op.fp2_subA_ = subA; } - neg = fp::func_ptr_cast(op.fp2_negA_); - if (neg == 0) neg = negC; + if (op.fp2_negA_ == 0) { + op.fp2_negA_ = negA; + } mul = fp::func_ptr_cast(op.fp2_mulA_); if (mul == 0) mul = mulC; sqr = fp::func_ptr_cast(op.fp2_sqrA_); @@ -516,8 +523,10 @@ private: Fp::sub(z.a, x.a, y.a); Fp::sub(z.b, x.b, y.b); } - static void negC(Fp2T& y, const Fp2T& x) + static void negA(Unit *py, const Unit *px) { + Fp2T& y = *reinterpret_cast(py); + const Fp2T& x = *reinterpret_cast(px); Fp::neg(y.a, x.a); Fp::neg(y.b, x.b); } @@ -613,7 +622,6 @@ private: }; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*Fp2T::neg)(Fp2T& y, const Fp2T& x); template void (*Fp2T::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); template void (*Fp2T::sqr)(Fp2T& y, const Fp2T& x); template void (*Fp2T::mul2)(Fp2T& y, const Fp2T& x); From 646f008ded65498f52173d32e6b888f8dc23cc2f Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 11:40:28 +0900 Subject: [PATCH 11/23] avoid cast of Fp2::mul --- include/mcl/fp_tower.hpp | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 3d2c317..bdd0cdc 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -245,14 +245,20 @@ public: Fp::op_.fp2_negA_(y.a.v_, x.a.v_); #else negA(y.a.v_, x.a.v_); +#endif + } + static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) + { +#ifdef MCL_XBYAK_DIRECT_CALL + Fp::op_.fp2_mulA_(z.a.v_, x.a.v_, y.a.v_); +#else + mulA(z.a.v_, x.a.v_, y.a.v_); #endif } #ifdef MCL_XBYAK_DIRECT_CALL - static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); static void (*sqr)(Fp2T& y, const Fp2T& x); static void (*mul2)(Fp2T& y, const Fp2T& x); #else - static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) { mulC(z, x, y); } static void sqr(Fp2T& y, const Fp2T& x) { sqrC(y, x); } static void mul2(Fp2T& y, const Fp2T& x) { mul2C(y, x); } #endif @@ -412,8 +418,9 @@ public: if (op.fp2_negA_ == 0) { op.fp2_negA_ = negA; } - mul = fp::func_ptr_cast(op.fp2_mulA_); - if (mul == 0) mul = mulC; + if (op.fp2_mulA_ == 0) { + op.fp2_mulA_ = mulA; + } sqr = fp::func_ptr_cast(op.fp2_sqrA_); if (sqr == 0) sqr = sqrC; mul2 = fp::func_ptr_cast(op.fp2_mul2A_); @@ -530,18 +537,21 @@ private: Fp::neg(y.a, x.a); Fp::neg(y.b, x.b); } - static void mul2C(Fp2T& y, const Fp2T& x) - { - Fp::mul2(y.a, x.a); - Fp::mul2(y.b, x.b); - } - static void mulC(Fp2T& z, const Fp2T& x, const Fp2T& y) + static void mulA(Unit *pz, const Unit *px, const Unit *py) { + Fp2T& z = *reinterpret_cast(pz); + const Fp2T& x = *reinterpret_cast(px); + const Fp2T& y = *reinterpret_cast(py); Fp2Dbl d; Fp2Dbl::mulPre(d, x, y); FpDbl::mod(z.a, d.a); FpDbl::mod(z.b, d.b); } + static void mul2C(Fp2T& y, const Fp2T& x) + { + Fp::mul2(y.a, x.a); + Fp::mul2(y.b, x.b); + } /* x = a + bi, i^2 = -1 y = x^2 = (a + bi)^2 = (a + b)(a - b) + 2abi @@ -622,7 +632,6 @@ private: }; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*Fp2T::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); template void (*Fp2T::sqr)(Fp2T& y, const Fp2T& x); template void (*Fp2T::mul2)(Fp2T& y, const Fp2T& x); #endif From 7d56a7fbf82b6a8ba6ecea9aa84e059f5c4c8b59 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 11:46:27 +0900 Subject: [PATCH 12/23] avoid cast of Fp2::sqr --- include/mcl/fp_tower.hpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index bdd0cdc..3bb7b97 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -253,13 +253,19 @@ public: Fp::op_.fp2_mulA_(z.a.v_, x.a.v_, y.a.v_); #else mulA(z.a.v_, x.a.v_, y.a.v_); +#endif + } + static void sqr(Fp2T& y, const Fp2T& x) + { +#ifdef MCL_XBYAK_DIRECT_CALL + Fp::op_.fp2_sqrA_(y.a.v_, x.a.v_); +#else + sqrA(y.a.v_, x.a.v_); #endif } #ifdef MCL_XBYAK_DIRECT_CALL - static void (*sqr)(Fp2T& y, const Fp2T& x); static void (*mul2)(Fp2T& y, const Fp2T& x); #else - static void sqr(Fp2T& y, const Fp2T& x) { sqrC(y, x); } static void mul2(Fp2T& y, const Fp2T& x) { mul2C(y, x); } #endif static void (*mul_xi)(Fp2T& y, const Fp2T& x); @@ -421,8 +427,9 @@ public: if (op.fp2_mulA_ == 0) { op.fp2_mulA_ = mulA; } - sqr = fp::func_ptr_cast(op.fp2_sqrA_); - if (sqr == 0) sqr = sqrC; + if (op.fp2_sqrA_ == 0) { + op.fp2_sqrA_ = sqrA; + } mul2 = fp::func_ptr_cast(op.fp2_mul2A_); if (mul2 == 0) mul2 = mul2C; mul_xi = fp::func_ptr_cast(op.fp2_mul_xiA_); @@ -556,8 +563,10 @@ private: x = a + bi, i^2 = -1 y = x^2 = (a + bi)^2 = (a + b)(a - b) + 2abi */ - static void sqrC(Fp2T& y, const Fp2T& x) + static void sqrA(Unit *py, const Unit *px) { + Fp2T& y = *reinterpret_cast(py); + const Fp2T& x = *reinterpret_cast(px); const Fp& a = x.a; const Fp& b = x.b; #if 1 // faster than using FpDbl @@ -632,7 +641,6 @@ private: }; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*Fp2T::sqr)(Fp2T& y, const Fp2T& x); template void (*Fp2T::mul2)(Fp2T& y, const Fp2T& x); #endif template void (*Fp2T::mul_xi)(Fp2T& y, const Fp2T& x); From 5f95d70767d6343c563f618a631bfe389eabf30a Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 11:55:15 +0900 Subject: [PATCH 13/23] avoid cast of Fp2::mul2 --- include/mcl/fp_tower.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 3bb7b97..7fa885b 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -263,11 +263,14 @@ public: sqrA(y.a.v_, x.a.v_); #endif } + static void mul2(Fp2T& y, const Fp2T& x) + { #ifdef MCL_XBYAK_DIRECT_CALL - static void (*mul2)(Fp2T& y, const Fp2T& x); + Fp::op_.fp2_mul2A_(y.a.v_, x.a.v_); #else - static void mul2(Fp2T& y, const Fp2T& x) { mul2C(y, x); } + mul2A(y.a.v_, x.a.v_); #endif + } static void (*mul_xi)(Fp2T& y, const Fp2T& x); static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); } static void inv(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_inv(y.a.v_, x.a.v_); } @@ -430,8 +433,9 @@ public: if (op.fp2_sqrA_ == 0) { op.fp2_sqrA_ = sqrA; } - mul2 = fp::func_ptr_cast(op.fp2_mul2A_); - if (mul2 == 0) mul2 = mul2C; + if (op.fp2_mul2A_ == 0) { + op.fp2_mul2A_ = mul2A; + } mul_xi = fp::func_ptr_cast(op.fp2_mul_xiA_); #endif op.fp2_inv = fp2_invW; @@ -554,8 +558,10 @@ private: FpDbl::mod(z.a, d.a); FpDbl::mod(z.b, d.b); } - static void mul2C(Fp2T& y, const Fp2T& x) + static void mul2A(Unit *py, const Unit *px) { + Fp2T& y = *reinterpret_cast(py); + const Fp2T& x = *reinterpret_cast(px); Fp::mul2(y.a, x.a); Fp::mul2(y.b, x.b); } @@ -640,9 +646,6 @@ private: } }; -#ifdef MCL_XBYAK_DIRECT_CALL -template void (*Fp2T::mul2)(Fp2T& y, const Fp2T& x); -#endif template void (*Fp2T::mul_xi)(Fp2T& y, const Fp2T& x); template From 02a51c8077784f8e015b3c7215cde8da09c5fed2 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 12:18:07 +0900 Subject: [PATCH 14/23] avoid cast of Fp2::mul_xi --- include/mcl/fp_tower.hpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 7fa885b..f75b4c2 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -271,7 +271,10 @@ public: mul2A(y.a.v_, x.a.v_); #endif } - static void (*mul_xi)(Fp2T& y, const Fp2T& x); + static void mul_xi(Fp2T& y, const Fp2T& x) + { + Fp::op_.fp2_mul_xiA_(y.a.v_, x.a.v_); + } static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); } static void inv(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_inv(y.a.v_, x.a.v_); } static void divBy2(Fp2T& y, const Fp2T& x) @@ -416,7 +419,6 @@ public: *pb = false; return; } - mul_xi = 0; #ifdef MCL_XBYAK_DIRECT_CALL if (op.fp2_addA_ == 0) { op.fp2_addA_ = addA; @@ -436,16 +438,15 @@ public: if (op.fp2_mul2A_ == 0) { op.fp2_mul2A_ = mul2A; } - mul_xi = fp::func_ptr_cast(op.fp2_mul_xiA_); #endif - op.fp2_inv = fp2_invW; - if (mul_xi == 0) { + if (op.fp2_mul_xiA_ == 0) { if (op.xi_a == 1) { - mul_xi = fp2_mul_xi_1_1iC; + op.fp2_mul_xiA_ = fp2_mul_xi_1_1iA; } else { - mul_xi = fp2_mul_xiC; + op.fp2_mul_xiA_ = fp2_mul_xiA; } } + op.fp2_inv = fp2_invW; FpDblT::init(); Fp2DblT::init(); // call init before Fp2::pow because FpDbl is used in Fp2T @@ -601,8 +602,10 @@ private: y = (a + bi)xi = (a + bi)(xi_a + i) =(a * x_ia - b) + (a + b xi_a)i */ - static void fp2_mul_xiC(Fp2T& y, const Fp2T& x) + static void fp2_mul_xiA(Unit *py, const Unit *px) { + Fp2T& y = *reinterpret_cast(py); + const Fp2T& x = *reinterpret_cast(px); const Fp& a = x.a; const Fp& b = x.b; Fp t; @@ -616,8 +619,10 @@ private: xi = 1 + i ; xi_a = 1 y = (a + bi)xi = (a - b) + (a + b)i */ - static void fp2_mul_xi_1_1iC(Fp2T& y, const Fp2T& x) + static void fp2_mul_xi_1_1iA(Unit *py, const Unit *px) { + Fp2T& y = *reinterpret_cast(py); + const Fp2T& x = *reinterpret_cast(px); const Fp& a = x.a; const Fp& b = x.b; Fp t; @@ -646,8 +651,6 @@ private: } }; -template void (*Fp2T::mul_xi)(Fp2T& y, const Fp2T& x); - template struct Fp2DblT { typedef FpDblT FpDbl; From f474245e874f386b279899afbee762290f91b49b Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 12:24:45 +0900 Subject: [PATCH 15/23] avoid cast in Fp2::inv --- include/mcl/fp_tower.hpp | 39 ++++++++++++++++++--------------------- include/mcl/op.hpp | 2 -- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index f75b4c2..247f7e9 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -275,8 +275,25 @@ public: { Fp::op_.fp2_mul_xiA_(y.a.v_, x.a.v_); } + /* + x = a + bi + 1 / x = (a - bi) / (a^2 + b^2) + */ + static void inv(Fp2T& y, const Fp2T& x) + { + assert(!x.isZero()); + const Fp& a = x.a; + const Fp& b = x.b; + Fp aa, bb; + Fp::sqr(aa, a); + Fp::sqr(bb, b); + aa += bb; + Fp::inv(aa, aa); // aa = 1 / (a^2 + b^2) + Fp::mul(y.a, a, aa); + Fp::mul(y.b, b, aa); + Fp::neg(y.b, y.b); + } static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); } - static void inv(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_inv(y.a.v_, x.a.v_); } static void divBy2(Fp2T& y, const Fp2T& x) { Fp::divBy2(y.a, x.a); @@ -446,7 +463,6 @@ public: op.fp2_mul_xiA_ = fp2_mul_xiA; } } - op.fp2_inv = fp2_invW; FpDblT::init(); Fp2DblT::init(); // call init before Fp2::pow because FpDbl is used in Fp2T @@ -630,25 +646,6 @@ private: Fp::sub(y.a, a, b); y.b = t; } - /* - x = a + bi - 1 / x = (a - bi) / (a^2 + b^2) - */ - static void fp2_invW(Unit *y, const Unit *x) - { - const Fp *px = reinterpret_cast(x); - Fp *py = reinterpret_cast(y); - const Fp& a = px[0]; - const Fp& b = px[1]; - Fp aa, bb; - Fp::sqr(aa, a); - Fp::sqr(bb, b); - aa += bb; - Fp::inv(aa, aa); // aa = 1 / (a^2 + b^2) - Fp::mul(py[0], a, aa); - Fp::mul(py[1], b, aa); - Fp::neg(py[1], py[1]); - } }; template diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index b1085da..25d6bce 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -258,7 +258,6 @@ struct Op { */ int xi_a; // xi = xi_a + u void4u fp2_mulNF; - void2u fp2_inv; void2u fp2_mul_xiA_; uint32_t (*hash)(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize); @@ -345,7 +344,6 @@ struct Op { xi_a = 0; fp2_mulNF = 0; - fp2_inv = 0; fp2_mul_xiA_ = 0; hash = 0; From 125451649c6f7b219f316f383fffaddc9a2b10cf Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 12:34:51 +0900 Subject: [PATCH 16/23] use FpDbl in Fp2::inv --- include/mcl/fp_tower.hpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 247f7e9..1dea498 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -284,13 +284,15 @@ public: assert(!x.isZero()); const Fp& a = x.a; const Fp& b = x.b; - Fp aa, bb; - Fp::sqr(aa, a); - Fp::sqr(bb, b); - aa += bb; - Fp::inv(aa, aa); // aa = 1 / (a^2 + b^2) - Fp::mul(y.a, a, aa); - Fp::mul(y.b, b, aa); + FpDbl AA, BB; + FpDbl::sqrPre(AA, a); + FpDbl::sqrPre(BB, b); + FpDbl::addPre(AA, AA, BB); + Fp r; + FpDbl::mod(r, AA); + Fp::inv(r, r); // r = 1 / (a^2 + b^2) + Fp::mul(y.a, a, r); + Fp::mul(y.b, b, r); Fp::neg(y.b, y.b); } static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); } From e25fc2fa62767d5dd5d547b3da4321c0e4e5055f Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 14:30:39 +0900 Subject: [PATCH 17/23] refactor cast of Fp2T --- include/mcl/fp_tower.hpp | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 1dea498..b92e750 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -540,38 +540,40 @@ public: } #endif private: + static Fp2T& cast(Unit *x) { return *reinterpret_cast(x); } + static const Fp2T& cast(const Unit *x) { return *reinterpret_cast(x); } /* default Fp2T operator Fp2T = Fp[i]/(i^2 + 1) */ static void addA(Unit *pz, const Unit *px, const Unit *py) { - Fp2T& z = *reinterpret_cast(pz); - const Fp2T& x = *reinterpret_cast(px); - const Fp2T& y = *reinterpret_cast(py); + Fp2T& z = cast(pz); + const Fp2T& x = cast(px); + const Fp2T& y = cast(py); Fp::add(z.a, x.a, y.a); Fp::add(z.b, x.b, y.b); } static void subA(Unit *pz, const Unit *px, const Unit *py) { - Fp2T& z = *reinterpret_cast(pz); - const Fp2T& x = *reinterpret_cast(px); - const Fp2T& y = *reinterpret_cast(py); + Fp2T& z = cast(pz); + const Fp2T& x = cast(px); + const Fp2T& y = cast(py); Fp::sub(z.a, x.a, y.a); Fp::sub(z.b, x.b, y.b); } static void negA(Unit *py, const Unit *px) { - Fp2T& y = *reinterpret_cast(py); - const Fp2T& x = *reinterpret_cast(px); + Fp2T& y = cast(py); + const Fp2T& x = cast(px); Fp::neg(y.a, x.a); Fp::neg(y.b, x.b); } static void mulA(Unit *pz, const Unit *px, const Unit *py) { - Fp2T& z = *reinterpret_cast(pz); - const Fp2T& x = *reinterpret_cast(px); - const Fp2T& y = *reinterpret_cast(py); + Fp2T& z = cast(pz); + const Fp2T& x = cast(px); + const Fp2T& y = cast(py); Fp2Dbl d; Fp2Dbl::mulPre(d, x, y); FpDbl::mod(z.a, d.a); @@ -579,8 +581,8 @@ private: } static void mul2A(Unit *py, const Unit *px) { - Fp2T& y = *reinterpret_cast(py); - const Fp2T& x = *reinterpret_cast(px); + Fp2T& y = cast(py); + const Fp2T& x = cast(px); Fp::mul2(y.a, x.a); Fp::mul2(y.b, x.b); } @@ -590,8 +592,8 @@ private: */ static void sqrA(Unit *py, const Unit *px) { - Fp2T& y = *reinterpret_cast(py); - const Fp2T& x = *reinterpret_cast(px); + Fp2T& y = cast(py); + const Fp2T& x = cast(px); const Fp& a = x.a; const Fp& b = x.b; #if 1 // faster than using FpDbl @@ -622,8 +624,8 @@ private: */ static void fp2_mul_xiA(Unit *py, const Unit *px) { - Fp2T& y = *reinterpret_cast(py); - const Fp2T& x = *reinterpret_cast(px); + Fp2T& y = cast(py); + const Fp2T& x = cast(px); const Fp& a = x.a; const Fp& b = x.b; Fp t; @@ -639,8 +641,8 @@ private: */ static void fp2_mul_xi_1_1iA(Unit *py, const Unit *px) { - Fp2T& y = *reinterpret_cast(py); - const Fp2T& x = *reinterpret_cast(px); + Fp2T& y = cast(py); + const Fp2T& x = cast(px); const Fp& a = x.a; const Fp& b = x.b; Fp t; From 0f141988bd7b84d79362202b10e7998005d6fe20 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 15:13:03 +0900 Subject: [PATCH 18/23] tweak --- include/mcl/fp_tower.hpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index b92e750..13d922c 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -284,18 +284,18 @@ public: assert(!x.isZero()); const Fp& a = x.a; const Fp& b = x.b; - FpDbl AA, BB; - FpDbl::sqrPre(AA, a); - FpDbl::sqrPre(BB, b); - FpDbl::addPre(AA, AA, BB); Fp r; - FpDbl::mod(r, AA); + norm(r, x); Fp::inv(r, r); // r = 1 / (a^2 + b^2) Fp::mul(y.a, a, r); Fp::mul(y.b, b, r); Fp::neg(y.b, y.b); } - static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); } + static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) + { + Fp::addPre(z.a, x.a, y.a); + Fp::addPre(z.b, x.b, y.b); + } static void divBy2(Fp2T& y, const Fp2T& x) { Fp::divBy2(y.a, x.a); @@ -400,12 +400,14 @@ public: Fp::mul(y.b, x.b, t2); return true; } + // y = a^2 + b^2 static void inline norm(Fp& y, const Fp2T& x) { - Fp aa, bb; - Fp::sqr(aa, x.a); - Fp::sqr(bb, x.b); - Fp::add(y, aa, bb); + FpDbl AA, BB; + FpDbl::sqrPre(AA, x.a); + FpDbl::sqrPre(BB, x.b); + FpDbl::addPre(AA, AA, BB); + FpDbl::mod(y, AA); } /* Frobenius From de21f2ea4ce48455bfa025fb67a32242aec953cc Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 15:43:25 +0900 Subject: [PATCH 19/23] avoid cast of Fp2Dbl::mulPre --- include/mcl/fp_tower.hpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 13d922c..d7ab91b 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -656,6 +656,7 @@ private: template struct Fp2DblT { + typedef Fp2DblT Fp2Dbl; typedef FpDblT FpDbl; typedef Fp2T Fp2; typedef fp::Unit Unit; @@ -711,11 +712,13 @@ struct Fp2DblT { FpDbl::add(y.b, y.b, x.a); y.a = t; } - static void (*mulPre)(Fp2DblT&, const Fp2&, const Fp2&); + static void mulPre(Fp2DblT& z, const Fp2& x, const Fp2& y) + { + Fp::getOp().fp2Dbl_mulPreA_(z.a.v_, x.getUnit(), y.getUnit()); + } static void sqrPre(Fp2DblT& y, const Fp2& x) { - const mcl::fp::Op& op = Fp::getOp(); - op.fp2Dbl_sqrPreA_(y.a.v_, x.getUnit()); + Fp::getOp().fp2Dbl_sqrPreA_(y.a.v_, x.getUnit()); } static void (*mul_xi)(Fp2DblT&, const Fp2DblT&); static void mod(Fp2& y, const Fp2DblT& x) @@ -735,13 +738,11 @@ struct Fp2DblT { { assert(!Fp::getOp().isFullBit); mcl::fp::Op& op = Fp::getOpNonConst(); - if (op.fp2Dbl_mulPreA_) { - mulPre = fp::func_ptr_cast(op.fp2Dbl_mulPreA_); - } else { - mulPre = fp2Dbl_mulPreW; + if (op.fp2Dbl_mulPreA_ == 0) { + op.fp2Dbl_mulPreA_ = mulPreA; } if (op.fp2Dbl_sqrPreA_ == 0) { - op.fp2Dbl_sqrPreA_ = fp2Dbl_sqrPreC; + op.fp2Dbl_sqrPreA_ = sqrPreA; } const uint32_t xi_a = Fp2::get_xi_a(); switch (xi_a) { @@ -756,12 +757,19 @@ struct Fp2DblT { break; } } +private: + static Fp2 cast(Unit *x) { return *reinterpret_cast(x); } + static const Fp2 cast(const Unit *x) { return *reinterpret_cast(x); } + static Fp2Dbl& castD(Unit *x) { return *reinterpret_cast(x); } /* Fp2Dbl::mulPre by FpDblT @note mod of NIST_P192 is fast */ - static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y) + static void mulPreA(Unit *pz, const Unit *px, const Unit *py) { + Fp2Dbl& z = castD(pz); + const Fp2& x = cast(px); + const Fp2& y = cast(py); assert(!Fp::getOp().isFullBit); const Fp& a = x.a; const Fp& b = x.b; @@ -780,11 +788,11 @@ struct Fp2DblT { FpDbl::subPre(d1, d1, d2); FpDbl::sub(d0, d0, d2); // ac - bd } - static void fp2Dbl_sqrPreC(Unit *py, const Unit *px) + static void sqrPreA(Unit *py, const Unit *px) { assert(!Fp::getOp().isFullBit); - const Fp2& x = *reinterpret_cast(px); - Fp2DblT& y = *reinterpret_cast(py); + Fp2Dbl& y = castD(py); + const Fp2& x = cast(px); Fp t1, t2; Fp::addPre(t1, x.b, x.b); // 2b Fp::addPre(t2, x.a, x.b); // a + b @@ -794,7 +802,6 @@ struct Fp2DblT { } }; -template void (*Fp2DblT::mulPre)(Fp2DblT&, const Fp2T&, const Fp2T&); template void (*Fp2DblT::mul_xi)(Fp2DblT&, const Fp2DblT&); template Fp2T Fp2T::g[Fp2T::gN]; From 8f195134f7ba1cbbfcedcf69739d0252a02e0361 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 15:52:26 +0900 Subject: [PATCH 20/23] avoid cast in Fp2Dbl::mul_xi --- include/mcl/fp_tower.hpp | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index d7ab91b..d4536a9 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -695,16 +695,20 @@ struct Fp2DblT { FpDbl::neg(y.a, x.a); FpDbl::neg(y.b, x.b); } - static void mul_xi_1C(Fp2DblT& y, const Fp2DblT& x) + static void mul_xi_1A(Unit *py, const Unit *px) { + Fp2Dbl& y = castD(py); + const Fp2Dbl& x = castD(px); FpDbl t; FpDbl::add(t, x.a, x.b); FpDbl::sub(y.a, x.a, x.b); y.b = t; } - static void mul_xi_genericC(Fp2DblT& y, const Fp2DblT& x) + static void mul_xi_genericA(Unit *py, const Unit *px) { const uint32_t xi_a = Fp2::get_xi_a(); + Fp2Dbl& y = castD(py); + const Fp2Dbl& x = castD(px); FpDbl t; FpDbl::mulUnit(t, x.a, xi_a); FpDbl::sub(t, t, x.b); @@ -720,7 +724,10 @@ struct Fp2DblT { { Fp::getOp().fp2Dbl_sqrPreA_(y.a.v_, x.getUnit()); } - static void (*mul_xi)(Fp2DblT&, const Fp2DblT&); + static void mul_xi(Fp2DblT& y, const Fp2DblT& x) + { + Fp::getOp().fp2Dbl_mul_xiA_(y.a.v_, x.a.getUnit()); + } static void mod(Fp2& y, const Fp2DblT& x) { FpDbl::mod(y.a, x.a); @@ -744,23 +751,20 @@ struct Fp2DblT { if (op.fp2Dbl_sqrPreA_ == 0) { op.fp2Dbl_sqrPreA_ = sqrPreA; } - const uint32_t xi_a = Fp2::get_xi_a(); - switch (xi_a) { - case 1: - mul_xi = mul_xi_1C; - if (op.fp2Dbl_mul_xiA_) { - mul_xi = fp::func_ptr_cast(op.fp2Dbl_mul_xiA_); + if (op.fp2Dbl_mul_xiA_ == 0) { + const uint32_t xi_a = Fp2::get_xi_a(); + if (xi_a == 1) { + op.fp2Dbl_mul_xiA_ = mul_xi_1A; + } else { + op.fp2Dbl_mul_xiA_ = mul_xi_genericA; } - break; - default: - mul_xi = mul_xi_genericC; - break; } } private: static Fp2 cast(Unit *x) { return *reinterpret_cast(x); } static const Fp2 cast(const Unit *x) { return *reinterpret_cast(x); } static Fp2Dbl& castD(Unit *x) { return *reinterpret_cast(x); } + static const Fp2Dbl& castD(const Unit *x) { return *reinterpret_cast(x); } /* Fp2Dbl::mulPre by FpDblT @note mod of NIST_P192 is fast @@ -802,8 +806,6 @@ private: } }; -template void (*Fp2DblT::mul_xi)(Fp2DblT&, const Fp2DblT&); - template Fp2T Fp2T::g[Fp2T::gN]; template Fp2T Fp2T::g2[Fp2T::gN]; template Fp2T Fp2T::g3[Fp2T::gN]; From 3be037a85375d4f1ad71d1ab0d3a197d06899fbf Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 16:30:07 +0900 Subject: [PATCH 21/23] tweak --- include/mcl/fp_tower.hpp | 42 ++++++++++++++++++++-------------------- test/common_test.hpp | 12 +++++++++--- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index d4536a9..a2cf930 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -695,27 +695,6 @@ struct Fp2DblT { FpDbl::neg(y.a, x.a); FpDbl::neg(y.b, x.b); } - static void mul_xi_1A(Unit *py, const Unit *px) - { - Fp2Dbl& y = castD(py); - const Fp2Dbl& x = castD(px); - FpDbl t; - FpDbl::add(t, x.a, x.b); - FpDbl::sub(y.a, x.a, x.b); - y.b = t; - } - static void mul_xi_genericA(Unit *py, const Unit *px) - { - const uint32_t xi_a = Fp2::get_xi_a(); - Fp2Dbl& y = castD(py); - const Fp2Dbl& x = castD(px); - FpDbl t; - FpDbl::mulUnit(t, x.a, xi_a); - FpDbl::sub(t, t, x.b); - FpDbl::mulUnit(y.b, x.b, xi_a); - FpDbl::add(y.b, y.b, x.a); - y.a = t; - } static void mulPre(Fp2DblT& z, const Fp2& x, const Fp2& y) { Fp::getOp().fp2Dbl_mulPreA_(z.a.v_, x.getUnit(), y.getUnit()); @@ -804,6 +783,27 @@ private: Fp::sub(t1, x.a, x.b); // a - b FpDbl::mulPre(y.a, t1, t2); // (a + b)(a - b) } + static void mul_xi_1A(Unit *py, const Unit *px) + { + Fp2Dbl& y = castD(py); + const Fp2Dbl& x = castD(px); + FpDbl t; + FpDbl::add(t, x.a, x.b); + FpDbl::sub(y.a, x.a, x.b); + y.b = t; + } + static void mul_xi_genericA(Unit *py, const Unit *px) + { + const uint32_t xi_a = Fp2::get_xi_a(); + Fp2Dbl& y = castD(py); + const Fp2Dbl& x = castD(px); + FpDbl t; + FpDbl::mulUnit(t, x.a, xi_a); + FpDbl::sub(t, t, x.b); + FpDbl::mulUnit(y.b, x.b, xi_a); + FpDbl::add(y.b, y.b, x.a); + y.a = t; + } }; template Fp2T Fp2T::g[Fp2T::gN]; diff --git a/test/common_test.hpp b/test/common_test.hpp index 74a745c..5deb9f1 100644 --- a/test/common_test.hpp +++ b/test/common_test.hpp @@ -163,10 +163,11 @@ void testABCD() void testFp2Dbl_mul_xi1() { - if (Fp2::get_xi_a() != 1) return; + const uint32_t xi_a = Fp2::get_xi_a(); + if (xi_a != 1) return; puts("testFp2Dbl_mul_xi1"); cybozu::XorShift rg; - for (int i = 0; i < 100; i++) { + for (int i = 0; i < 10; i++) { Fp a1, a2; a1.setByCSPRNG(rg); a2.setByCSPRNG(rg); @@ -176,7 +177,12 @@ void testFp2Dbl_mul_xi1() a2.setByCSPRNG(rg); FpDbl::mulPre(x.b, a1, a2); Fp2Dbl ok; - Fp2Dbl::mul_xi_1C(ok, x); + { + FpDbl::mulUnit(ok.a, x.a, xi_a); + ok.a -= x.b; + FpDbl::mulUnit(ok.b, x.b, xi_a); + ok.b += x.a; + } Fp2Dbl::mul_xi(x, x); CYBOZU_TEST_EQUAL_ARRAY(ok.a.getUnit(), x.a.getUnit(), ok.a.getUnitSize()); CYBOZU_TEST_EQUAL_ARRAY(ok.b.getUnit(), x.b.getUnit(), ok.b.getUnitSize()); From ead99eb002ea98eeb424f134431d50f5005a3ed6 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 16:38:34 +0900 Subject: [PATCH 22/23] avoid cast of Fp::add --- include/mcl/fp.hpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index f41d4f8..f33e905 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -146,8 +146,9 @@ public: ioMode_ = 0; isETHserialization_ = false; #ifdef MCL_XBYAK_DIRECT_CALL - add = fp::func_ptr_cast(op_.fp_addA_); - if (add == 0) add = addC; + if (op_.fp_addA_ == 0) { + op_.fp_addA_ = addA; + } sub = fp::func_ptr_cast(op_.fp_subA_); if (sub == 0) sub = subC; neg = fp::func_ptr_cast(op_.fp_negA_); @@ -518,9 +519,21 @@ public: } setArray(pb, gmp::getUnit(x), gmp::getUnitSize(x)); } + static void add(FpT& z, const FpT& x, const FpT& y) + { +#ifdef MCL_XBYAK_DIRECT_CALL + op_.fp_addA_(z.v_, x.v_, y.v_); +#else + op_.fp_add(z.v_, x.v_, y.v_, op_.p); +#endif + } +#ifdef MCL_XBYAK_DIRECT_CALL + static inline void addA(Unit *z, const Unit *x, const Unit *y) + { + op_.fp_add(z, x, y, op_.p); + } +#endif #ifdef MCL_XBYAK_DIRECT_CALL - static void (*add)(FpT& z, const FpT& x, const FpT& y); - static inline void addC(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); } static void (*sub)(FpT& z, const FpT& x, const FpT& y); static inline void subC(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); } static void (*neg)(FpT& y, const FpT& x); @@ -534,7 +547,6 @@ public: static void (*mul9)(FpT& y, const FpT& x); static inline void mul9C(FpT& y, const FpT& x) { mulSmall(y, x, 9); } #else - static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); } static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); } static inline void neg(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_, op_.p); } static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); } @@ -789,7 +801,6 @@ template FpT FpT template int FpT::ioMode_ = IoAuto; template bool FpT::isETHserialization_ = false; #ifdef MCL_XBYAK_DIRECT_CALL -template void (*FpT::add)(FpT& z, const FpT& x, const FpT& y); template void (*FpT::sub)(FpT& z, const FpT& x, const FpT& y); template void (*FpT::neg)(FpT& y, const FpT& x); template void (*FpT::mul)(FpT& z, const FpT& x, const FpT& y); From 86dd59e3c3faf532d58fd9e61168eedb1ba9005a Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 10 May 2021 17:16:40 +0900 Subject: [PATCH 23/23] avoid cast of Fp::sub/neg/mul/sqr/mul2/mul9 --- include/mcl/fp.hpp | 148 +++++++++++++++++++++++++++++++-------------- 1 file changed, 101 insertions(+), 47 deletions(-) diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index f33e905..7cf258e 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -100,6 +100,47 @@ private: template friend class FpDblT; template friend class Fp2T; template friend struct Fp6T; +#ifdef MCL_XBYAK_DIRECT_CALL + static inline void addA(Unit *z, const Unit *x, const Unit *y) + { + op_.fp_add(z, x, y, op_.p); + } + static inline void subA(Unit *z, const Unit *x, const Unit *y) + { + op_.fp_sub(z, x, y, op_.p); + } + static inline void negA(Unit *y, const Unit *x) + { + op_.fp_neg(y, x, op_.p); + } + static inline void mulA(Unit *z, const Unit *x, const Unit *y) + { + op_.fp_mul(z, x, y, op_.p); + } + static inline void sqrA(Unit *y, const Unit *x) + { + op_.fp_sqr(y, x, op_.p); + } + static inline void mul2A(Unit *y, const Unit *x) + { + op_.fp_mul2(y, x, op_.p); + } +#endif + static inline void mul9A(Unit *y, const Unit *x) + { + mulSmall(y, x, 9); +// op_.fp_mul9(y, x, op_.p); + } + static inline void mulSmall(Unit *z, const Unit *x, const uint32_t y) + { + assert(y <= op_.smallModp.maxMulN); + Unit xy[maxSize + 1]; + op_.fp_mulUnitPre(xy, x, y); + int v = op_.smallModp.approxMul(xy); + const Unit *pv = op_.smallModp.getPmul(v); + op_.fp_subPre(z, xy, pv); + op_.fp_sub(z, z, op_.p, op_.p); + } public: typedef FpT BaseFp; // return pointer to array v_[] @@ -149,18 +190,24 @@ public: if (op_.fp_addA_ == 0) { op_.fp_addA_ = addA; } - sub = fp::func_ptr_cast(op_.fp_subA_); - if (sub == 0) sub = subC; - neg = fp::func_ptr_cast(op_.fp_negA_); - if (neg == 0) neg = negC; - mul = fp::func_ptr_cast(op_.fp_mulA_); - if (mul == 0) mul = mulC; - sqr = fp::func_ptr_cast(op_.fp_sqrA_); - if (sqr == 0) sqr = sqrC; - mul2 = fp::func_ptr_cast(op_.fp_mul2A_); - if (mul2 == 0) mul2 = mul2C; - mul9 = fp::func_ptr_cast(op_.fp_mul9A_); - if (mul9 == 0) mul9 = mul9C; + if (op_.fp_subA_ == 0) { + op_.fp_subA_ = subA; + } + if (op_.fp_negA_ == 0) { + op_.fp_negA_ = negA; + } + if (op_.fp_mulA_ == 0) { + op_.fp_mulA_ = mulA; + } + if (op_.fp_sqrA_ == 0) { + op_.fp_sqrA_ = sqrA; + } + if (op_.fp_mul2A_ == 0) { + op_.fp_mul2A_ = mul2A; + } + if (op_.fp_mul9A_ == 0) { + op_.fp_mul9A_ = mul9A; + } #endif *pb = true; } @@ -527,44 +574,59 @@ public: op_.fp_add(z.v_, x.v_, y.v_, op_.p); #endif } + static void sub(FpT& z, const FpT& x, const FpT& y) + { #ifdef MCL_XBYAK_DIRECT_CALL - static inline void addA(Unit *z, const Unit *x, const Unit *y) + op_.fp_subA_(z.v_, x.v_, y.v_); +#else + op_.fp_sub(z.v_, x.v_, y.v_, op_.p); +#endif + } + static void neg(FpT& y, const FpT& x) { - op_.fp_add(z, x, y, op_.p); +#ifdef MCL_XBYAK_DIRECT_CALL + op_.fp_negA_(y.v_, x.v_); +#else + op_.fp_neg(y.v_, x.v_, op_.p); +#endif + } + static void mul(FpT& z, const FpT& x, const FpT& y) + { +#ifdef MCL_XBYAK_DIRECT_CALL + op_.fp_mulA_(z.v_, x.v_, y.v_); +#else + op_.fp_mul(z.v_, x.v_, y.v_, op_.p); +#endif + } + static void sqr(FpT& y, const FpT& x) + { +#ifdef MCL_XBYAK_DIRECT_CALL + op_.fp_sqrA_(y.v_, x.v_); +#else + op_.fp_sqr(y.v_, x.v_, op_.p); +#endif } + static void mul2(FpT& y, const FpT& x) + { +#ifdef MCL_XBYAK_DIRECT_CALL + op_.fp_mul2A_(y.v_, x.v_); +#else + op_.fp_mul2(y.v_, x.v_, op_.p); #endif + } + static void mul9(FpT& y, const FpT& x) + { #ifdef MCL_XBYAK_DIRECT_CALL - static void (*sub)(FpT& z, const FpT& x, const FpT& y); - static inline void subC(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); } - static void (*neg)(FpT& y, const FpT& x); - static inline void negC(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_, op_.p); } - static void (*mul)(FpT& z, const FpT& x, const FpT& y); - static inline void mulC(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); } - static void (*sqr)(FpT& y, const FpT& x); - static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); } - static void (*mul2)(FpT& y, const FpT& x); - static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); } - static void (*mul9)(FpT& y, const FpT& x); - static inline void mul9C(FpT& y, const FpT& x) { mulSmall(y, x, 9); } + op_.fp_mul9A_(y.v_, x.v_); #else - static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); } - static inline void neg(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_, op_.p); } - static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); } - static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); } - static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); } - static inline void mul9(FpT& y, const FpT& x) { mulSmall(y, x, 9); } + mul9A(y.v_, x.v_); #endif + } static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); } static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); } static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y) { - assert(y <= op_.smallModp.maxMulN); - Unit xy[maxSize + 1]; - op_.fp_mulUnitPre(xy, x.v_, y); - int v = op_.smallModp.approxMul(xy); - const Unit *pv = op_.smallModp.getPmul(v); - op_.fp_subPre(z.v_, xy, pv); - op_.fp_sub(z.v_, z.v_, op_.p, op_.p); + mulSmall(z.v_, x.v_, y); } static inline void mulUnit(FpT& z, const FpT& x, const Unit y) { @@ -800,14 +862,6 @@ template fp::Op FpT::op_; template FpT FpT::inv2_; template int FpT::ioMode_ = IoAuto; template bool FpT::isETHserialization_ = false; -#ifdef MCL_XBYAK_DIRECT_CALL -template void (*FpT::sub)(FpT& z, const FpT& x, const FpT& y); -template void (*FpT::neg)(FpT& y, const FpT& x); -template void (*FpT::mul)(FpT& z, const FpT& x, const FpT& y); -template void (*FpT::sqr)(FpT& y, const FpT& x); -template void (*FpT::mul2)(FpT& y, const FpT& x); -template void (*FpT::mul9)(FpT& y, const FpT& x); -#endif } // mcl