diff --git a/sample/large.cpp b/sample/large.cpp index 72de4a2..cd79412 100644 --- a/sample/large.cpp +++ b/sample/large.cpp @@ -108,11 +108,11 @@ void test(const std::string& pStr, mcl::fp::Mode mode) } CYBOZU_BENCH("mulPre", op.fpDbl_mulPre, ux, ux, uy); CYBOZU_BENCH("sqrPre", op.fpDbl_sqrPre, ux, ux); - CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux); - CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux); + CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux, op.p); + CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux, op.p); CYBOZU_BENCH("addNC", op.fpDbl_addNC, ux, ux, ux); CYBOZU_BENCH("subNC", op.fpDbl_subNC, ux, ux, ux); - CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux); + CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux, op.p); CYBOZU_BENCH("mul", Fp::mul, x, x, x); compareGmp(pStr); } diff --git a/sample/rawbench.cpp b/sample/rawbench.cpp index 083d0cf..ddfe733 100644 --- a/sample/rawbench.cpp +++ b/sample/rawbench.cpp @@ -36,19 +36,19 @@ void benchRaw(const char *p, mcl::fp::Mode mode) double fpDbl_addT, fpDbl_subT; double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT; double fp2_sqrT, fp2_mulT; - CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy); - CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux); + CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy, op.p); + CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux, op.p); CYBOZU_BENCH_T(fp_addNCT, op.fp_addNC, uz, ux, uy); CYBOZU_BENCH_T(fp_subNCT, op.fp_subNC, uz, uy, ux); - CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux); - CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy); - CYBOZU_BENCH_T(fp_mul_UnitT, op.fp_mul_Unit, uz, ux, 12345678); + CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux, op.p); + CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy, op.p); + CYBOZU_BENCH_T(fp_mul_UnitT, op.fp_mul_Unit, uz, ux, 12345678, op.p); CYBOZU_BENCH_T(fp_mul_UnitPreT, op.fp_mul_UnitPre, ux, ux, 12345678); - CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy); - CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux); + CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy, op.p); + CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux, op.p); CYBOZU_BENCH_T(fpDbl_sqrPreT, op.fpDbl_sqrPre, uz, ux); CYBOZU_BENCH_T(fpDbl_mulPreT, op.fpDbl_mulPre, uz, ux, uy); - CYBOZU_BENCH_T(fpDbl_modT, op.fpDbl_mod, uz, ux); + CYBOZU_BENCH_T(fpDbl_modT, op.fpDbl_mod, uz, ux, op.p); Fp2 f2x, f2y; f2x.a = fx; f2x.b = fy; diff --git a/src/fp.cpp b/src/fp.cpp index bcaa295..97648d2 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -107,6 +107,70 @@ Mode StrToMode(const std::string& s) throw cybozu::Exception("StrToMode") << s; } +#ifdef MCL_USE_LLVM + +#define MCL_DEF_LLVM_FUNC(bit) \ +template<>const u3u AddNC::f = &mcl_fp_addNC ## bit ## L; \ +template<>const u3u SubNC::f = &mcl_fp_subNC ## bit ## L; \ +template<>const void3u MulPre::f = &mcl_fpDbl_mulPre ## bit ## L; \ +template<>const void2u SqrPre::f = &mcl_fpDbl_sqrPre ## bit ## L; \ +template<>const void2uI Mul_UnitPre::f = &mcl_fp_mul_UnitPre ## bit ## L; \ +template<>const void4u Add::f = &mcl_fp_add ## bit ## L; \ +template<>const void4u Sub::f = &mcl_fp_sub ## bit ## L; \ +template<>const void4u Mont::f = &mcl_fp_mont ## bit ## L; \ +template<>const void3u MontRed::f = &mcl_fp_montRed ## bit ## L; \ +template<>const void4u DblAdd::f = &mcl_fpDbl_add ## bit ## L; \ +template<>const void4u DblSub::f = &mcl_fpDbl_sub ## bit ## L; \ + +template +struct Mul { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + Unit xy[N * 2]; + MulPre::f(xy, x, y); + Dbl_Mod::f(z, xy, p); + } + static const void4u f; +}; + +template +const void4u Mul::f = Mul::func; + +template +struct Sqr { + static inline void func(Unit *y, const Unit *x, const Unit *p) + { + Unit xx[N * 2]; + SqrPre::f(xx, x); + Dbl_Mod::f(y, xx, p); + } + static const void3u f; +}; + +template +const void3u Sqr::f = Sqr::func; + +MCL_DEF_LLVM_FUNC(64) +MCL_DEF_LLVM_FUNC(128) +MCL_DEF_LLVM_FUNC(192) +MCL_DEF_LLVM_FUNC(256) +MCL_DEF_LLVM_FUNC(320) +MCL_DEF_LLVM_FUNC(384) +MCL_DEF_LLVM_FUNC(448) +MCL_DEF_LLVM_FUNC(512) +#if CYBOZU_OS_BIT == 32 +MCL_DEF_LLVM_FUNC(160) +MCL_DEF_LLVM_FUNC(224) +MCL_DEF_LLVM_FUNC(288) +MCL_DEF_LLVM_FUNC(352) +MCL_DEF_LLVM_FUNC(416) +MCL_DEF_LLVM_FUNC(480) +MCL_DEF_LLVM_FUNC(544) +#else +MCL_DEF_LLVM_FUNC(576) +#endif + +#endif template struct OpeFunc { @@ -136,43 +200,6 @@ struct OpeFunc { { copyArray(y, x, N); } - static inline void fp_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - if (AddPre::f(z, x, y)) { - SubPre::f(z, z, p); - return; - } - Unit tmp[N]; - if (SubPre::f(tmp, z, p) == 0) { - memcpy(z, tmp, sizeof(tmp)); - } - } - static inline void fp_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - if (SubPre::f(z, x, y)) { - AddPre::f(z, z, p); - } - } - /* - z[N * 2] <- x[N * 2] + y[N * 2] mod p[N] << (N * UnitBitSize) - */ - static inline void fpDbl_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - if (AddPre::f(z, x, y)) { - SubPre::f(z + N, z + N, p); - return; - } - Unit tmp[N]; - if (SubPre::f(tmp, z + N, p) == 0) { - memcpy(z + N, tmp, sizeof(tmp)); - } - } - static inline void fpDbl_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p) - { - if (SubPre::f(z, x, y)) { - AddPre::f(z + N, z + N, p); - } - } // z[N] <- mont(x[N], y[N]) static inline void fp_mulMontC(Unit *z, const Unit *x, const Unit *y, const Unit *p) { @@ -189,20 +216,20 @@ struct OpeFunc { Unit t[N + 2]; Mul_UnitPre::f(t, p, q); // p * q t[N + 1] = 0; // always zero - c[N + 1] = AddPre::f(c, c, t); + c[N + 1] = AddNC::f(c, c, t); c++; for (size_t i = 1; i < N; i++) { Mul_UnitPre::f(t, x, y[i]); - c[N + 1] = AddPre::f(c, c, t); + c[N + 1] = AddNC::f(c, c, t); q = c[0] * rp; Mul_UnitPre::f(t, p, q); - AddPre::f(c, c, t); + AddNC::f(c, c, t); c++; } if (c[N]) { - SubPre::f(z, c, p); + SubNC::f(z, c, p); } else { - if (SubPre::f(z, c, p)) { + if (SubNC::f(z, c, p)) { memcpy(z, c, N * sizeof(Unit)); } } @@ -221,7 +248,7 @@ struct OpeFunc { Unit *c = buf; Unit q = xy[0] * rp; Mul_UnitPre::f(t, p, q); - buf[N * 2] = AddPre::f(buf, xy, t); + buf[N * 2] = AddNC::f(buf, xy, t); c++; for (size_t i = 1; i < N; i++) { q = c[0] * rp; @@ -231,9 +258,9 @@ struct OpeFunc { c++; } if (c[N]) { - SubPre::f(z, c, p); + SubNC::f(z, c, p); } else { - if (SubPre::f(z, c, p)) { + if (SubNC::f(z, c, p)) { memcpy(z, c, N * sizeof(Unit)); } } @@ -289,39 +316,48 @@ struct OpeFunc { if (x != y) fp_clearC(y); return; } - fp_subC(y, p, x, p); + SubNC::f(y, p, x); } }; #ifdef MCL_USE_LLVM - #define SET_OP_LLVM(bit) \ + #define SET_OP_LLVM /* assume n */ \ if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \ - fp_add = mcl_fp_add ## bit ## L; \ - fp_sub = mcl_fp_sub ## bit ## L; \ - if (!isFullBit) { \ - fp_addNC = mcl_fp_addNC ## bit ## L; \ - fp_subNC = mcl_fp_subNC ## bit ## L; \ - } \ - fpDbl_mulPre = mcl_fpDbl_mulPre ## bit ## L; \ - fp_mul_UnitPre = mcl_fp_mul_UnitPre ## bit ## L; \ - fpDbl_sqrPre = mcl_fpDbl_sqrPre ## bit ## L; \ + fp_add = Add::f; \ + fp_sub = Sub::f; \ + fpDbl_add = DblAdd::f; \ + fpDbl_sub = DblSub::f; \ if (mode == FP_LLVM_MONT) { \ - fpDbl_mod = mcl_fp_montRed ## bit ## L; \ - fp_mul = mcl_fp_mont ## bit ## L; \ + fp_mul = Mont::f; \ + fp_sqr = SqrMont::f; \ + fpDbl_mod = MontRed::f; \ + } else { \ + fp_mul = Mul::f; \ + fp_sqr = Sqr::f; \ + } \ + fpDbl_mulPre = MulPre::f; \ + fpDbl_sqrPre = SqrPre::f; \ + fp_mul_UnitPre = Mul_UnitPre::f; \ + if (!isFullBit) { \ + fp_addNC = AddNC::f; \ + fp_subNC = SubNC::f; \ } \ } - #define SET_OP_DBL_LLVM(bit, n2) \ + +#define SET_OP_LLVM2(bit) \ + { \ + const int n = bit / UnitBitSize; \ if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \ - fpDbl_add = mcl_fpDbl_add ## bit ## L; \ - fpDbl_sub = mcl_fpDbl_sub ## bit ## L; \ if (!isFullBit) { \ - fpDbl_addNC = mcl_fp_addNC ## n2 ## L; \ - fpDbl_subNC = mcl_fp_subNC ## n2 ## L; \ + fpDbl_addNC = AddNC::f; \ + fpDbl_subNC = SubNC::f; \ } \ - } + } \ + } + #else - #define SET_OP_LLVM(bit) - #define SET_OP_DBL_LLVM(bit, n2) + #define SET_OP_LLVM + #define SET_OP_LLVM2(bit) #endif #define SET_OP(bit) \ @@ -332,8 +368,8 @@ struct OpeFunc { fp_clear = OpeFunc::fp_clearC; \ fp_copy = OpeFunc::fp_copyC; \ fp_neg = OpeFunc::fp_negC; \ - fp_add = OpeFunc::fp_addC; \ - fp_sub = OpeFunc::fp_subC; \ + fp_add = Add::f; \ + fp_sub = Sub::f; \ if (isMont) { \ fp_mul = OpeFunc::fp_mulMontC; \ fp_sqr = OpeFunc::fp_sqrMontC; \ @@ -350,15 +386,15 @@ struct OpeFunc { fpDbl_sqrPre = SqrPre::f; \ fp_mul_UnitPre = Mul_UnitPre::f; \ fpN1_mod = N1_Mod::f; \ - fpDbl_add = OpeFunc::fpDbl_addC; \ - fpDbl_sub = OpeFunc::fpDbl_subC; \ + fpDbl_add = DblAdd::f; \ + fpDbl_sub = DblSub::f; \ if (!isFullBit) { \ - fp_addNC = AddPre::f; \ - fp_subNC = SubPre::f; \ - fpDbl_addNC = AddPre::f; \ - fpDbl_subNC = SubPre::f; \ + fp_addNC = AddNC::f; \ + fp_subNC = SubNC::f; \ + fpDbl_addNC = AddNC::f; \ + fpDbl_subNC = SubNC::f; \ } \ - SET_OP_LLVM(bit) \ + SET_OP_LLVM \ } #ifdef MCL_USE_XBYAK @@ -476,41 +512,26 @@ void Op::init(const std::string& mstr, size_t maxBitSize, Mode mode) } #endif switch (roundBit) { - case 64: SET_OP(64); SET_OP_DBL_LLVM(64, 128); break; - case 128: SET_OP(128); SET_OP_DBL_LLVM(128, 256); break; - case 192: SET_OP(192); SET_OP_DBL_LLVM(192, 384); break; - case 256: SET_OP(256); SET_OP_DBL_LLVM(256, 512); break; + case 64: SET_OP(64); SET_OP_LLVM2(64); break; + case 128: SET_OP(128); SET_OP_LLVM2(128); break; + case 192: SET_OP(192); SET_OP_LLVM2(192); break; + case 256: SET_OP(256); SET_OP_LLVM2(256); break; case 320: SET_OP(320); break; case 384: SET_OP(384); break; case 448: SET_OP(448); break; - case 512: SET_OP(512); - // QQQ : need refactor for large prime -#if MCL_MAX_OP_BIT_SIZE == 768 - SET_OP_DBL_LLVM(512, 1024); -#endif - break; + case 512: SET_OP(512); break; #if CYBOZU_OS_BIT == 64 - case 576: SET_OP(576); -#if MCL_MAX_OP_BIT_SIZE == 768 - SET_OP_DBL_LLVM(576, 1152); -#endif - break; + case 576: SET_OP(576); break; #if MCL_MAX_OP_BIT_SIZE == 768 - case 640: SET_OP(640); - SET_OP_DBL_LLVM(640, 1280); - break; - case 704: SET_OP(704); - SET_OP_DBL_LLVM(704, 1408); - break; - case 768: SET_OP(768); - SET_OP_DBL_LLVM(768, 1536); - break; + case 640: SET_OP(640); break; + case 704: SET_OP(704); break; + case 768: SET_OP(768); break; #endif #else - case 32: SET_OP(32); SET_OP_DBL_LLVM(32, 64); break; - case 96: SET_OP(96); SET_OP_DBL_LLVM(96, 192); break; - case 160: SET_OP(160); SET_OP_DBL_LLVM(160, 320); break; - case 224: SET_OP(224); SET_OP_DBL_LLVM(224, 448); break; + case 32: SET_OP(32); SET_OP_LLVM2(32); break; + case 96: SET_OP(96); SET_OP_LLVM2(96); break; + case 160: SET_OP(160); SET_OP_LLVM2(160); break; + case 224: SET_OP(224); SET_OP_LLVM2(224); break; case 288: SET_OP(288); break; case 352: SET_OP(352); break; case 416: SET_OP(416); break; diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp index 99763e6..a30730b 100644 --- a/src/fp_proto.hpp +++ b/src/fp_proto.hpp @@ -10,32 +10,124 @@ namespace mcl { namespace fp { +struct Ltag; +struct Atag; + // (carry, z[N]) <- x[N] + y[N] -templateclass AddPre { static const u3u f; }; +templatestruct AddNC { static const u3u f; }; // (carry, z[N]) <- x[N] - y[N] -templateclass SubPre { static const u3u f; }; +templatestruct SubNC { static const u3u f; }; // z[N * 2] <- x[N] * y[N] -templateclass MulPre { static const void3u f; }; +templatestruct MulPre { static const void3u f; }; // z[N * 2] <- x[N] * x[N] -templateclass SqrPre { static const void2u f; }; +templatestruct SqrPre { static const void2u f; }; // z[N + 1] <- x[N] * y -templateclass Mul_UnitPre { static const void2uI f; }; +templatestruct Mul_UnitPre { static const void2uI f; }; // z[N] <- x[N + 1] % p[N] -templateclass N1_Mod { static const void3u f; }; +templatestruct N1_Mod { static const void3u f; }; // z[N] <- x[N * 2] % p[N] -templateclass Dbl_Mod { static const void3u f; }; +templatestruct Dbl_Mod { static const void3u f; }; +// z[N] <- Montgomery(x[N], y[N], p[N]) +templatestruct Mont { static const void4u f; }; +// z[N] <- MontRed(xy[N], p[N]) +templatestruct MontRed { static const void3u f; }; + +// z[N] <- (x[N] * y[N]) % p[N] +templatestruct Mul { static const void4u f; }; +// z[N] <- (x[N] ^ 2) % p[N] +templatestruct Sqr { static const void3u f; }; + +// z[N] <- Montgomery(x[N], x[N], p[N]) +template +struct SqrMont { + static inline void func(Unit *y, const Unit *x, const Unit *p) + { + Mont::f(y, x, x, p); + } + static const void3u f; +}; +template +const void3u SqrMont::f = SqrMont::func; + +// z[N] <- (x[N] + y[N]) % p[N] +template +struct Add { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + if (AddNC::f(z, x, y)) { + SubNC::f(z, z, p); + return; + } + Unit tmp[N]; + if (SubNC::f(tmp, z, p) == 0) { + memcpy(z, tmp, sizeof(tmp)); + } + } + static const void4u f; +}; + +template +const void4u Add::f = Add::func; + +// z[N] <- (x[N] - y[N]) % p[N] +template +struct Sub { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + if (SubNC::f(z, x, y)) { + AddNC::f(z, z, p); + } + } + static const void4u f; +}; + +template +const void4u Sub::f = Sub::func; + +// z[N * 2] <- (x[N * 2] + y[N * 2]) mod p[N] << (N * UnitBitSize) +template +struct DblAdd { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + if (AddNC::f(z, x, y)) { + SubNC::f(z + N, z + N, p); + return; + } + Unit tmp[N]; + if (SubNC::f(tmp, z + N, p) == 0) { + memcpy(z + N, tmp, sizeof(tmp)); + } + } + static const void4u f; +}; + +template +const void4u DblAdd::f = DblAdd::func; + +// z[N * 2] <- (x[N * 2] - y[N * 2]) mod p[N] << (N * UnitBitSize) +template +struct DblSub { + static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + if (SubNC::f(z, x, y)) { + AddNC::f(z + N, z + N, p); + } + } + static const void4u f; +}; + +template +const void4u DblSub::f = DblSub::func; } } // mcl::fp #ifdef MCL_USE_LLVM -extern "C" { - #define MCL_FP_DEF_FUNC_SUB(len, suf) \ void mcl_fp_add ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ void mcl_fp_sub ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ -void mcl_fp_addNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ -void mcl_fp_subNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ +mcl::fp::Unit mcl_fp_addNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ +mcl::fp::Unit mcl_fp_subNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fp_mul_UnitPre ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl::fp::Unit y); \ void mcl_fpDbl_mulPre ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fpDbl_sqrPre ## len ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \ @@ -45,15 +137,16 @@ void mcl_fpDbl_add ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const void mcl_fpDbl_sub ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); #define MCL_FP_DEF_FUNC(len) \ - MCL_FP_DEF_FUNC_SUB(len, G) \ MCL_FP_DEF_FUNC_SUB(len, L) \ MCL_FP_DEF_FUNC_SUB(len, A) #define MCL_FP_DEF_FUNC_SPECIAL(suf) \ - void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \ - void mcl_fp_mul_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* /* dummy */); \ - void mcl_fp_sqr_NIST_P192 ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x, const mcl::fp::Unit* /* dummy */); \ - void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); +void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \ +void mcl_fp_mul_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* /* dummy */); \ +void mcl_fp_sqr_NIST_P192 ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x, const mcl::fp::Unit* /* dummy */); \ +void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); + +extern "C" { MCL_FP_DEF_FUNC(64) MCL_FP_DEF_FUNC(128) @@ -83,14 +176,13 @@ MCL_FP_DEF_FUNC(1408) MCL_FP_DEF_FUNC(1536) #endif -MCL_FP_DEF_FUNC_SPECIAL(G) MCL_FP_DEF_FUNC_SPECIAL(L) MCL_FP_DEF_FUNC_SPECIAL(A) +} + #undef MCL_FP_DEF_FUNC_SUB #undef MCL_FP_DEF_FUNC -} - #endif diff --git a/src/gen.cpp b/src/gen.cpp index b7d9f9f..155a5b6 100644 --- a/src/gen.cpp +++ b/src/gen.cpp @@ -296,57 +296,65 @@ struct Code : public mcl::Generator { void gen_mcl_fp_addsubNC(bool isAdd) { resetGlobalIdx(); + Operand r(Int, unit); Operand pz(IntPtr, bit); Operand px(IntPtr, bit); Operand py(IntPtr, bit); std::string name; if (isAdd) { name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L"; - mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py); + mcl_fp_addNCM[bit] = Function(name, r, pz, px, py); verifyAndSetPrivate(mcl_fp_addNCM[bit]); beginFunc(mcl_fp_addNCM[bit]); } else { name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L"; - mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py); + mcl_fp_subNCM[bit] = Function(name, r, pz, px, py); verifyAndSetPrivate(mcl_fp_subNCM[bit]); beginFunc(mcl_fp_subNCM[bit]); } - Operand x = load(px); - Operand y = load(py); + Operand x = zext(load(px), bit + unit); + Operand y = zext(load(py), bit + unit); Operand z; if (isAdd) { z = add(x, y); + store(trunc(z, bit), pz); + r = trunc(lshr(z, bit), unit); } else { z = sub(x, y); + store(trunc(z, bit), pz); + r = _and(trunc(lshr(z, bit), unit), makeImm(unit, 1)); } - store(z, pz); - ret(Void); + ret(r); endFunc(); } -#if 0 - void gen_mcl_fp_addS() +#if 0 // void-return version + void gen_mcl_fp_addsubNC(bool isAdd) { resetGlobalIdx(); Operand pz(IntPtr, bit); Operand px(IntPtr, bit); Operand py(IntPtr, bit); - Operand pp(IntPtr, bit); - std::string name = "mcl_fp_add" + cybozu::itoa(bit) + "S"; - mcl_fp_addM[bit] = Function(name, Void, pz, px, py, pp); - beginFunc(mcl_fp_addM[bit]); + std::string name; + if (isAdd) { + name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L"; + mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py); + verifyAndSetPrivate(mcl_fp_addNCM[bit]); + beginFunc(mcl_fp_addNCM[bit]); + } else { + name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L"; + mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py); + verifyAndSetPrivate(mcl_fp_subNCM[bit]); + beginFunc(mcl_fp_subNCM[bit]); + } Operand x = load(px); Operand y = load(py); - Operand p = load(pp); - x = zext(x, bit + unit); - y = zext(y, bit + unit); - p = zext(p, bit + unit); - Operand t0 = add(x, y); - Operand t1 = sub(t0, p); - Operand t = lshr(t1, bit); - t = trunc(t, 1); - t = select(t, t0, t1); - t = trunc(t, bit); - store(t, pz); + Operand z; + if (isAdd) { + z = add(x, y); + } else { + z = sub(x, y); + } + store(z, pz); ret(Void); endFunc(); } @@ -385,33 +393,6 @@ struct Code : public mcl::Generator { ret(Void); endFunc(); } -#if 0 - void gen_mcl_fp_subS() - { - resetGlobalIdx(); - Operand pz(IntPtr, bit); - Operand px(IntPtr, bit); - Operand py(IntPtr, bit); - Operand pp(IntPtr, bit); - std::string name = "mcl_fp_sub" + cybozu::itoa(bit) + "S"; - mcl_fp_subM[bit] = Function(name, Void, pz, px, py, pp); - beginFunc(mcl_fp_subM[bit]); - Operand x = load(px); - Operand y = load(py); - x = zext(x, bit + unit); - y = zext(y, bit + unit); - Operand vc = sub(x, y); - Operand v = trunc(vc, bit); // v = x - y - Operand c = lshr(vc, bit); - c = trunc(c, 1); - Operand p = load(pp); - Operand z = select(c, p, makeImm(bit, 0)); - v = add(v, z); - store(v, pz); - ret(Void); - endFunc(); - } -#endif void gen_mcl_fp_sub() { resetGlobalIdx(); diff --git a/src/low_gmp.hpp b/src/low_gmp.hpp index 44477d5..d11a30f 100644 --- a/src/low_gmp.hpp +++ b/src/low_gmp.hpp @@ -7,7 +7,7 @@ namespace mcl { namespace fp { struct Gtag; template -struct AddPre { +struct AddNC { static inline Unit func(Unit *z, const Unit *x, const Unit *y) { return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); @@ -16,10 +16,10 @@ struct AddPre { }; template -const u3u AddPre::f = &AddPre::func; +const u3u AddNC::f = &AddNC::func; template -struct SubPre { +struct SubNC { static inline Unit func(Unit *z, const Unit *x, const Unit *y) { return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); @@ -28,7 +28,7 @@ struct SubPre { }; template -const u3u SubPre::f = &SubPre::func; +const u3u SubNC::f = &SubNC::func; template struct MulPre {