diff --git a/include/mcl/fp_generator.hpp b/include/mcl/fp_generator.hpp index 5006bb8..253e3c7 100644 --- a/include/mcl/fp_generator.hpp +++ b/include/mcl/fp_generator.hpp @@ -253,12 +253,12 @@ struct FpGenerator : Xbyak::CodeGenerator { op.fpDbl_mod = getCurr(); gen_fpDbl_mod(); } - if (op.N == 2 || op.N == 3 || op.N == 4) { + if ((useMulx_ && op.N == 2) || op.N == 3 || op.N == 4) { align(16); op.fpDbl_mulPre = getCurr(); gen_fpDbl_mulPre(); } - if (op.N == 2 || op.N == 3 || op.N == 4) { + if ((useMulx_ && op.N == 2) || op.N == 3 || op.N == 4) { align(16); op.fpDbl_sqrPre = getCurr(); gen_fpDbl_sqrPre(op); diff --git a/include/mcl/fp_proto.hpp b/include/mcl/fp_proto.hpp index 865f09b..73755b1 100644 --- a/include/mcl/fp_proto.hpp +++ b/include/mcl/fp_proto.hpp @@ -55,6 +55,7 @@ MCL_FP_DEF_FUNC(576) #undef MCL_FP_DEF_FUNC +void mcl_fpDbl_mod_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*); void mcl_fp_mul_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); } diff --git a/src/fp.cpp b/src/fp.cpp index 3a49e9e..d3e60ca 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -413,6 +413,7 @@ void Op::init(const std::string& mstr, int base, size_t maxBitSize, Mode mode) #ifdef MCL_USE_LLVM if ((mode == FP_AUTO || mode == FP_LLVM) && mp == mpz_class("0xfffffffffffffffffffffffffffffffeffffffffffffffff")) { fp_mul = &mcl_fp_mul_NIST_P192; +// fpDbl_mod = &mcl_fpDbl_mod_NIST_P192; isMont = false; } #endif diff --git a/src/once.txt b/src/once.txt index 05c242d..df6dc4f 100644 --- a/src/once.txt +++ b/src/once.txt @@ -30,7 +30,7 @@ define internal i64 @extract192to64(i192 %x, i192 %shift) { ret i64 %t1 } -define internal void @fpDbl_modNIST_P192(i192* %out, i192* %px) { +define void @mcl_fpDbl_mod_NIST_P192(i192* %out, i192* %px) { %L192 = load i192* %px %L = zext i192 %L192 to i256 @@ -69,6 +69,6 @@ define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) { %buf = alloca i192, i32 2 %p = bitcast i192* %buf to i$(unit)* call void @mcl_fpDbl_mulPre192(i$(unit)* %p, i192* %px, i192* %py) - call void @fpDbl_modNIST_P192(i192* %pz, i192* %buf) + call void @mcl_fpDbl_mod_NIST_P192(i192* %pz, i192* %buf) ret void }