Fp::inv uses generated code

update-fork
MITSUNARI Shigeo 4 years ago
parent 02cce0ed47
commit 512a0abd44
  1. 2
      include/mcl/op.hpp
  2. 28
      src/fp.cpp
  3. 4
      src/fp_generator.hpp
  4. 2
      src/fp_static_code.hpp

@ -14,7 +14,7 @@
#define MCL_DONT_USE_XBYAK #define MCL_DONT_USE_XBYAK
#define MCL_DONT_USE_OPENSSL #define MCL_DONT_USE_OPENSSL
#endif #endif
#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8) #if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8) && !defined(MCL_STATIC_CODE)
#define MCL_USE_XBYAK #define MCL_USE_XBYAK
#endif #endif
#if defined(MCL_USE_XBYAK) || defined(MCL_STATIC_CODE) #if defined(MCL_USE_XBYAK) || defined(MCL_STATIC_CODE)

@ -377,29 +377,31 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
} }
op.rp = getMontgomeryCoeff(p[0]); op.rp = getMontgomeryCoeff(p[0]);
if (mode != FP_XBYAK) return true; if (mode != FP_XBYAK) return true;
#ifdef MCL_USE_VINT
const int maxInvN = 6;
#else
const int maxInvN = 4;
#endif
#ifdef MCL_X64_ASM
#ifdef MCL_USE_XBYAK #ifdef MCL_USE_XBYAK
if (op.fg == 0) op.fg = Op::createFpGenerator(); if (op.fg == 0) op.fg = Op::createFpGenerator();
bool useXbyak = op.fg->init(op, g_cpu); bool enableInv = op.fg->init(op, g_cpu);
#ifdef MCL_DUMP_JIT #ifdef MCL_DUMP_JIT
return true; return true;
#endif #endif
#ifdef MCL_USE_VINT
const int maxN = 6;
#else
const int maxN = 4;
#endif
if (useXbyak && op.isMont && N <= maxN) {
op.fp_invOp = &invOpForMontC;
initInvTbl(op);
}
#elif defined(MCL_STATIC_CODE) #elif defined(MCL_STATIC_CODE)
fp::setStaticCode(op); fp::setStaticCode(op);
if (op.isMont && N <= 4) { bool enableInv = true;
#endif // MCL_USE_XBYAK
if (enableInv && op.isMont && N <= maxInvN) {
op.fp_invOp = &invOpForMontC; op.fp_invOp = &invOpForMontC;
initInvTbl(op); initInvTbl(op);
} }
#endif #endif // MCL_X64_ASM
return true; return true;
} }

@ -393,7 +393,7 @@ private:
op.fp_sqrA_ = gen_sqr(); op.fp_sqrA_ = gen_sqr();
setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr()); setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());
if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4 if (op.primeMode != PM_NIST_P192 && op.N <= 6) { // support general op.N but not fast for op.N > 4
align(16); align(16);
op.fp_preInv = getCurr<int2u>(); op.fp_preInv = getCurr<int2u>();
gen_preInv(); gen_preInv();
@ -2676,7 +2676,7 @@ private:
*/ */
void gen_preInv() void gen_preInv()
{ {
assert(1 <= pn_ && pn_ <= 4); assert(1 <= pn_ && pn_ <= 6);
const int freeRegNum = 13; const int freeRegNum = 13;
StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max<int>(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8); StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max<int>(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8);
const Reg64& pr = sf.p[0]; const Reg64& pr = sf.p[0];

@ -25,6 +25,7 @@ void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
void mclx_Fp_sqr(Unit*, const Unit*); void mclx_Fp_sqr(Unit*, const Unit*);
void mclx_FpDbl_add(Unit*, const Unit*, const Unit*); void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*); void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
int mclx_Fp_preInv(Unit*, const Unit*);
Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*); Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*);
Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*); Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*); void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
@ -73,6 +74,7 @@ void setStaticCode(mcl::fp::Op& op)
op.fp2_mulA_ = mclx_Fp2_mul; op.fp2_mulA_ = mclx_Fp2_mul;
op.fp2_sqrA_ = mclx_Fp2_sqr; op.fp2_sqrA_ = mclx_Fp2_sqr;
op.fp2_mul_xiA_ = mclx_Fp2_mul_xi; op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
op.fp_preInv = mclx_Fp_preInv;
} else { } else {
// Fr, sizeof(Fr) = 32 // Fr, sizeof(Fr) = 32
op.fp_addPre = mclx_Fr_addPre; op.fp_addPre = mclx_Fr_addPre;

Loading…
Cancel
Save