Fp::inv uses generated code

update-fork
MITSUNARI Shigeo 4 years ago
parent 02cce0ed47
commit 512a0abd44
  1. 2
      include/mcl/op.hpp
  2. 28
      src/fp.cpp
  3. 4
      src/fp_generator.hpp
  4. 2
      src/fp_static_code.hpp

@ -14,7 +14,7 @@
#define MCL_DONT_USE_XBYAK
#define MCL_DONT_USE_OPENSSL
#endif
#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8)
#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8) && !defined(MCL_STATIC_CODE)
#define MCL_USE_XBYAK
#endif
#if defined(MCL_USE_XBYAK) || defined(MCL_STATIC_CODE)

@ -377,29 +377,31 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
}
op.rp = getMontgomeryCoeff(p[0]);
if (mode != FP_XBYAK) return true;
#ifdef MCL_USE_VINT
const int maxInvN = 6;
#else
const int maxInvN = 4;
#endif
#ifdef MCL_X64_ASM
#ifdef MCL_USE_XBYAK
if (op.fg == 0) op.fg = Op::createFpGenerator();
bool useXbyak = op.fg->init(op, g_cpu);
bool enableInv = op.fg->init(op, g_cpu);
#ifdef MCL_DUMP_JIT
return true;
#endif
#ifdef MCL_USE_VINT
const int maxN = 6;
#else
const int maxN = 4;
#endif
if (useXbyak && op.isMont && N <= maxN) {
op.fp_invOp = &invOpForMontC;
initInvTbl(op);
}
#elif defined(MCL_STATIC_CODE)
fp::setStaticCode(op);
if (op.isMont && N <= 4) {
bool enableInv = true;
#endif // MCL_USE_XBYAK
if (enableInv && op.isMont && N <= maxInvN) {
op.fp_invOp = &invOpForMontC;
initInvTbl(op);
}
#endif
#endif // MCL_X64_ASM
return true;
}

@ -393,7 +393,7 @@ private:
op.fp_sqrA_ = gen_sqr();
setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());
if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
if (op.primeMode != PM_NIST_P192 && op.N <= 6) { // support general op.N but not fast for op.N > 4
align(16);
op.fp_preInv = getCurr<int2u>();
gen_preInv();
@ -2676,7 +2676,7 @@ private:
*/
void gen_preInv()
{
assert(1 <= pn_ && pn_ <= 4);
assert(1 <= pn_ && pn_ <= 6);
const int freeRegNum = 13;
StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max<int>(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8);
const Reg64& pr = sf.p[0];

@ -25,6 +25,7 @@ void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
void mclx_Fp_sqr(Unit*, const Unit*);
void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
int mclx_Fp_preInv(Unit*, const Unit*);
Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*);
Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
@ -73,6 +74,7 @@ void setStaticCode(mcl::fp::Op& op)
op.fp2_mulA_ = mclx_Fp2_mul;
op.fp2_sqrA_ = mclx_Fp2_sqr;
op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
op.fp_preInv = mclx_Fp_preInv;
} else {
// Fr, sizeof(Fr) = 32
op.fp_addPre = mclx_Fr_addPre;

Loading…
Cancel
Save