From 40343a4a693025049328ef56630500e26ea37c31 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 11 Feb 2021 19:49:37 +0900 Subject: [PATCH] add mul2 --- include/mcl/op.hpp | 4 ++-- src/fp_generator.hpp | 31 +++++++++++++++++++++++++++++++ src/fp_static_code.hpp | 4 ++++ test/bench.hpp | 2 ++ 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 8753f76..ea5c379 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -232,12 +232,12 @@ struct Op { void1u fp_clear; void2u fp_copy; void2u fp_shr1; - void3u fp_mul2; void3u fp_neg; void4u fp_add; void4u fp_sub; void4u fp_mul; void3u fp_sqr; + void3u fp_mul2; void2uOp fp_invOp; void2uIu fp_mulUnit; // fpN1_mod + fp_mulUnitPre @@ -319,12 +319,12 @@ struct Op { fp_clear = 0; fp_copy = 0; fp_shr1 = 0; - fp_mul2 = 0; fp_neg = 0; fp_add = 0; fp_sub = 0; fp_mul = 0; fp_sqr = 0; + fp_mul2 = 0; fp_invOp = 0; fp_mulUnit = 0; diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index aaaaa4a..684ecb3 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -379,6 +379,10 @@ private: op.fp_shr1 = gen_shr1(); setFuncInfo(prof_, suf, "_shr1", op.fp_shr1, getCurr()); + align(16); + op.fp_mul2A_ = gen_mul2(); + setFuncInfo(prof_, suf, "_mul2", op.fp_mul2A_, getCurr()); + align(16); op.fp_negA_ = gen_fp_neg(); setFuncInfo(prof_, suf, "_neg", op.fp_negA_, getCurr()); @@ -915,6 +919,33 @@ private: mov(ptr [pz + (pn_ - 1) * 8], *t0); return func; } + void2u gen_mul2() + { + if (isFullBit_) return 0; + if (!(pn_ == 4 || pn_ == 6)) return 0; + void2u func = getCurr(); + const int n = pn_ * 2 - 2; + StackFrame sf(this, 2, n); + Pack x = sf.t.sub(0, pn_); + load_rm(x, sf.p[1]); +#if 0 + add_rr(x, x); +#else + for (int i = pn_ - 1; i > 0; i--) { + shld(x[i], x[i - 1], 1); + } + shl(x[0], 1); +#endif + Pack t = sf.t.sub(pn_, n - pn_); + t.append(sf.p[1]); + t.append(rax); // destroy last + mov_rr(t, x); + lea(rax, ptr[rip + pL_]); + sub_rm(t, rax); + cmovc_rr(t, x); + store_mr(sf.p[0], t); + return func; + } void3u gen_mul() { void3u func = getCurr(); diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp index 7421f0a..705e46e 100644 --- a/src/fp_static_code.hpp +++ b/src/fp_static_code.hpp @@ -23,6 +23,7 @@ void mclx_Fp_neg(Unit*, const Unit*); void mclx_FpDbl_mod(Unit*, const Unit*); void mclx_Fp_mul(Unit*, const Unit*, const Unit*); void mclx_Fp_sqr(Unit*, const Unit*); +void mclx_Fp_mul2(Unit*, const Unit*); void mclx_FpDbl_add(Unit*, const Unit*, const Unit*); void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*); int mclx_Fp_preInv(Unit*, const Unit*); @@ -45,6 +46,7 @@ void mclx_Fr_shr1(Unit*, const Unit*); void mclx_Fr_neg(Unit*, const Unit*); void mclx_Fr_mul(Unit*, const Unit*, const Unit*); void mclx_Fr_sqr(Unit*, const Unit*); +void mclx_Fr_mul2(Unit*, const Unit*); int mclx_Fr_preInv(Unit*, const Unit*); } // extern "C" @@ -61,6 +63,7 @@ void setStaticCode(mcl::fp::Op& op) op.fpDbl_modA_ = mclx_FpDbl_mod; op.fp_mulA_ = mclx_Fp_mul; op.fp_sqrA_ = mclx_Fp_sqr; + op.fp_mul2A_ = mclx_Fp_mul2; op.fpDbl_addA_ = mclx_FpDbl_add; op.fpDbl_subA_ = mclx_FpDbl_sub; op.fpDbl_addPre = mclx_FpDbl_addPre; @@ -85,6 +88,7 @@ void setStaticCode(mcl::fp::Op& op) op.fp_negA_ = mclx_Fr_neg; op.fp_mulA_ = mclx_Fr_mul; op.fp_sqrA_ = mclx_Fr_sqr; + op.fp_mul2A_ = mclx_Fr_mul2; op.fp_preInv = mclx_Fr_preInv; } op.fp_mul = fp::func_ptr_cast(op.fp_mulA_); diff --git a/test/bench.hpp b/test/bench.hpp index 094d082..7359181 100644 --- a/test/bench.hpp +++ b/test/bench.hpp @@ -116,6 +116,8 @@ void testBench(const G1& P, const G2& Q) CYBOZU_BENCH_C("Fr::add ", C3, Fr::add, a, a, b); CYBOZU_BENCH_C("Fr::sub ", C3, Fr::sub, a, a, b); CYBOZU_BENCH_C("Fr::neg ", C3, Fr::neg, a, a); + CYBOZU_BENCH_C("Fr::add 2 ", C3, Fr::add, a, a, b); + CYBOZU_BENCH_C("Fr::mul2 ", C3, Fr::mul2, a, a); CYBOZU_BENCH_C("Fr::mul ", C3, Fr::mul, a, a, b); CYBOZU_BENCH_C("Fr::sqr ", C3, Fr::sqr, a, a); CYBOZU_BENCH_C("Fr::inv ", C3, invAdd, a, a, b);