Fp::inv uses generated code

4 years ago · 512a0abd44
parent 02cce0ed47
commit 512a0abd44
4 changed files with 20 additions and 16 deletions
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@ -14,7 +14,7 @@
 	#define MCL_DONT_USE_XBYAK
 	#define MCL_DONT_USE_OPENSSL
 #endif
-#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8)
+#if !defined(MCL_DONT_USE_XBYAK) && (defined(_WIN64) || defined(__x86_64__)) && (MCL_SIZEOF_UNIT == 8) && !defined(MCL_STATIC_CODE)
 	#define MCL_USE_XBYAK
 #endif
 #if defined(MCL_USE_XBYAK) || defined(MCL_STATIC_CODE)
--- a/src/fp.cpp
+++ b/src/fp.cpp
@ -377,29 +377,31 @@ static bool initForMont(Op& op, const Unit *p, Mode mode)
 	}
 	op.rp = getMontgomeryCoeff(p[0]);
 	if (mode != FP_XBYAK) return true;
+
+#ifdef MCL_USE_VINT
+	const int maxInvN = 6;
+#else
+	const int maxInvN = 4;
+#endif
+
+#ifdef MCL_X64_ASM
+
 #ifdef MCL_USE_XBYAK
 	if (op.fg == 0) op.fg = Op::createFpGenerator();
-	bool useXbyak = op.fg->init(op, g_cpu);
+	bool enableInv = op.fg->init(op, g_cpu);
 #ifdef MCL_DUMP_JIT
 	return true;
 #endif
-#ifdef MCL_USE_VINT
-	const int maxN = 6;
-#else
-	const int maxN = 4;
-#endif
-
-	if (useXbyak && op.isMont && N <= maxN) {
-		op.fp_invOp = &invOpForMontC;
-		initInvTbl(op);
-	}
 #elif defined(MCL_STATIC_CODE)
 	fp::setStaticCode(op);
-	if (op.isMont && N <= 4) {
+	bool enableInv = true;
+#endif // MCL_USE_XBYAK
+
+	if (enableInv && op.isMont && N <= maxInvN) {
 		op.fp_invOp = &invOpForMontC;
 		initInvTbl(op);
 	}
-#endif
+#endif // MCL_X64_ASM
 	return true;
 }

--- a/src/fp_generator.hpp
+++ b/src/fp_generator.hpp
@ -393,7 +393,7 @@ private:
 		op.fp_sqrA_ = gen_sqr();
 		setFuncInfo(prof_, suf, "_sqr", op.fp_sqrA_, getCurr());

-		if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
+		if (op.primeMode != PM_NIST_P192 && op.N <= 6) { // support general op.N but not fast for op.N > 4
 			align(16);
 			op.fp_preInv = getCurr<int2u>();
 			gen_preInv();
@ -2676,7 +2676,7 @@ private:
 	*/
 	void gen_preInv()
 	{
-		assert(1 <= pn_ && pn_ <= 4);
+		assert(1 <= pn_ && pn_ <= 6);
 		const int freeRegNum = 13;
 		StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max<int>(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8);
 		const Reg64& pr = sf.p[0];
--- a/src/fp_static_code.hpp
+++ b/src/fp_static_code.hpp
@ -25,6 +25,7 @@ void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
 void mclx_Fp_sqr(Unit*, const Unit*);
 void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
+int mclx_Fp_preInv(Unit*, const Unit*);
 Unit mclx_FpDbl_addPre(Unit*, const Unit*, const Unit*);
 Unit mclx_FpDbl_subPre(Unit*, const Unit*, const Unit*);
 void mclx_FpDbl_mulPre(Unit*, const Unit*, const Unit*);
@ -73,6 +74,7 @@ void setStaticCode(mcl::fp::Op& op)
 		op.fp2_mulA_ = mclx_Fp2_mul;
 		op.fp2_sqrA_ = mclx_Fp2_sqr;
 		op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
+		op.fp_preInv = mclx_Fp_preInv;
 	} else {
 		// Fr, sizeof(Fr) = 32
 		op.fp_addPre = mclx_Fr_addPre;