From 2f643448bcbabdc230c86bdcd0c17f72f8265486 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 31 Jul 2017 16:03:57 +0900 Subject: [PATCH] add portable mul for 32-bit --- include/mcl/vint.hpp | 50 +++++++++++++++++++++++++++++++++++--------- src/fp.cpp | 19 +++++++++-------- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp index e8e5fde..75b5de3 100644 --- a/include/mcl/vint.hpp +++ b/include/mcl/vint.hpp @@ -44,26 +44,56 @@ inline void split64(uint32_t *H, uint32_t *L, uint64_t x) } /* - [H:L] <= a * b + [H:L] <= x * y @return L */ -static inline Unit mulUnit(Unit *H, Unit a, Unit b) +static inline Unit mulUnit(Unit *pH, Unit x, Unit y) { #if MCL_SIZEOF_UNIT == 4 - uint64_t t = uint64_t(a) * b; + uint64_t t = uint64_t(x) * y; uint32_t L; - split64(H, &L, t); + split64(pH, &L, t); return L; -#else -#if defined(_WIN64) && !defined(__INTEL_COMPILER) - return _umul128(a, b, H); +#elif MCL_VINT_MUL_PORTABLE + uint32_t a = uint32_t(x >> 32); + uint32_t b = uint32_t(x); + uint32_t c = uint32_t(y >> 32); + uint32_t d = uint32_t(y); + + uint64_t ad = uint64_t(d) * a; + uint64_t bd = uint64_t(d) * b; + uint64_t L = uint32_t(bd); + ad += bd >> 32; // [ad:L] + + uint64_t ac = uint64_t(c) * a; + uint64_t bc = uint64_t(c) * b; + uint64_t H = uint32_t(bc); + ac += bc >> 32; // [ac:H] + /* + adL + acH + */ + uint64_t t = (ac << 32) | H; + ac >>= 32; + H = t + ad; + if (H < t) { + ac++; + } + /* + ac:H:L + */ + L |= H << 32; + H = (ac << 32) | uint32_t(H >> 32); + *pH = H; + return L; +#elif defined(_WIN64) && !defined(__INTEL_COMPILER) + return _umul128(x, y, pH); #else typedef __attribute__((mode(TI))) unsigned int uint128; - uint128 t = uint128(a) * b; - *H = uint64_t(t >> 64); + uint128 t = uint128(x) * y; + *pH = uint64_t(t >> 64); return uint64_t(t); #endif -#endif } /* diff --git a/src/fp.cpp b/src/fp.cpp index 88979a2..9824644 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -438,22 +438,23 @@ void Op::init(const std::string& mstr, size_t maxBitSize, Mode mode, size_t mclM Xbyak > llvm_mont > llvm > gmp_mont > gmp */ #ifdef MCL_USE_XBYAK - if (mode == fp::FP_AUTO) mode = fp::FP_XBYAK; - if (mode == fp::FP_XBYAK && bitSize > 256) { - mode = fp::FP_AUTO; + if (mode == FP_AUTO) mode = FP_XBYAK; + if (mode == FP_XBYAK && bitSize > 256) { + mode = FP_AUTO; } - if (!fp::isEnableJIT()) { - mode = fp::FP_AUTO; + if (!isEnableJIT()) { + mode = FP_AUTO; } #else - if (mode == fp::FP_XBYAK) mode = fp::FP_AUTO; + if (mode == FP_XBYAK) mode = FP_AUTO; #endif #ifdef MCL_USE_LLVM - if (mode == fp::FP_AUTO) mode = fp::FP_LLVM_MONT; + if (mode == FP_AUTO) mode = FP_LLVM_MONT; #else - if (mode == fp::FP_LLVM || mode == fp::FP_LLVM_MONT) mode = fp::FP_AUTO; + if (mode == FP_LLVM || mode == FP_LLVM_MONT) mode = FP_AUTO; #endif - isMont = mode == fp::FP_GMP_MONT || mode == fp::FP_LLVM_MONT || mode == fp::FP_XBYAK; + if (mode == FP_AUTO) mode = FP_GMP_MONT; + isMont = mode == FP_GMP_MONT || mode == FP_LLVM_MONT || mode == FP_XBYAK; #ifndef NDEBUG fprintf(stderr, "mode=%s, isMont=%d, maxBitSize=%d" #ifdef MCL_USE_XBYAK