From 512a0abd44ab9123f7bdebaff87587e839af3a2d Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 1 Oct 2020 12:23:17 +0900 Subject: [PATCH] Fp::inv uses generated code --- include/mcl/op.hpp | 2 +- src/fp.cpp | 28 +++++++++++++++------------- src/fp_generator.hpp | 4 ++-- src/fp_static_code.hpp | 2 ++ 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 22a78b1..e8f572f 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -14,7 +14,7 @@ #define MCL_DONT_USE_XBYAK #define MCL_DONT_USE_OPENSSL #endif -#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8) +#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8) && !defined(MCL_STATIC_CODE) #define MCL_USE_XBYAK #endif #if defined(MCL_USE_XBYAK) || defined(MCL_STATIC_CODE) diff --git a/src/fp.cpp b/src/fp.cpp index d46eb6e..442ae43 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -377,29 +377,31 @@ static bool initForMont(Op& op, const Unit *p, Mode mode) } op.rp = getMontgomeryCoeff(p[0]); if (mode != FP_XBYAK) return true; + +#ifdef MCL_USE_VINT + const int maxInvN = 6; +#else + const int maxInvN = 4; +#endif + +#ifdef MCL_X64_ASM + #ifdef MCL_USE_XBYAK if (op.fg == 0) op.fg = Op::createFpGenerator(); - bool useXbyak = op.fg->init(op, g_cpu); + bool enableInv = op.fg->init(op, g_cpu); #ifdef MCL_DUMP_JIT return true; #endif -#ifdef MCL_USE_VINT - const int maxN = 6; -#else - const int maxN = 4; -#endif - - if (useXbyak && op.isMont && N <= maxN) { - op.fp_invOp = &invOpForMontC; - initInvTbl(op); - } #elif defined(MCL_STATIC_CODE) fp::setStaticCode(op); - if (op.isMont && N <= 4) { + bool enableInv = true; +#endif // MCL_USE_XBYAK + + if (enableInv && op.isMont && N <= maxInvN) { op.fp_invOp = &invOpForMontC; initInvTbl(op); } -#endif +#endif // MCL_X64_ASM return true; } diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 34a6ded..8002a9a 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -393,7 +393,7 @@ private: op.fp_sqrA_ = gen_sqr(); setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr()); - if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4 + if (op.primeMode != PM_NIST_P192 && op.N <= 6) { // support general op.N but not fast for op.N > 4 align(16); op.fp_preInv = getCurr(); gen_preInv(); @@ -2676,7 +2676,7 @@ private: */ void gen_preInv() { - assert(1 <= pn_ && pn_ <= 4); + assert(1 <= pn_ && pn_ <= 6); const int freeRegNum = 13; StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8); const Reg64& pr = sf.p[0]; diff --git a/src/fp_static_code.hpp b/src/fp_static_code.hpp index 832062e..09d4d01 100644 --- a/src/fp_static_code.hpp +++ b/src/fp_static_code.hpp @@ -25,6 +25,7 @@ void mclx_Fp_mul(Unit*, const Unit*, const Unit*); void mclx_Fp_sqr(Unit*, const Unit*); void mclx_FpDbl_add(Unit*, const Unit*, const Unit*); void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*); +int mclx_Fp_preInv(Unit*, const Unit*); Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*); Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*); void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*); @@ -73,6 +74,7 @@ void setStaticCode(mcl::fp::Op& op) op.fp2_mulA_ = mclx_Fp2_mul; op.fp2_sqrA_ = mclx_Fp2_sqr; op.fp2_mul_xiA_ = mclx_Fp2_mul_xi; + op.fp_preInv = mclx_Fp_preInv; } else { // Fr, sizeof(Fr) = 32 op.fp_addPre = mclx_Fr_addPre;