diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 7f57537..596a35f 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -28,37 +28,21 @@ #include #include -#ifndef MCL_FP_BLOCK_MAX_BIT_N - #define MCL_FP_BLOCK_MAX_BIT_N 521 -#endif - namespace mcl { namespace fp { -void setOp(mcl::fp::Op& op, const Unit* p, size_t bitLen); -void initOpByLLVM(Op& op, const fp::Unit* p, size_t bitLen); - - -struct Block { - typedef fp::Unit Unit; - const Unit *p; // pointer to original FpT.v_ - size_t n; - static const size_t UnitByteN = sizeof(Unit); - static const size_t maxUnitN = (MCL_FP_BLOCK_MAX_BIT_N + UnitByteN * 8 - 1) / (UnitByteN * 8); - Unit v_[maxUnitN]; -}; +struct TagDefault; } // mcl::fp template class FpT { typedef fp::Unit Unit; - static const size_t UnitByteN = sizeof(Unit); - static const size_t maxUnitN = (maxBitN + UnitByteN * 8 - 1) / (UnitByteN * 8); + static const size_t maxN = (maxBitN + fp::UnitBitN - 1) / fp::UnitBitN; static fp::Op op_; template friend class FpT; - Unit v_[maxUnitN]; + Unit v_[maxN]; public: // return pointer to array v_[] const Unit *getUnit() const { return v_; } @@ -79,7 +63,7 @@ public: if (isMinus) throw cybozu::Exception("mcl:FpT:setModulo:mstr is not minus") << mstr; const size_t bitLen = Gmp::getBitLen(op_.mp); if (bitLen > maxBitN) throw cybozu::Exception("mcl:FpT:setModulo:too large bitLen") << bitLen << maxBitN; - const size_t n = Gmp::getRaw(op_.p, maxUnitN, op_.mp); + const size_t n = Gmp::getRaw(op_.p, maxN, op_.mp); if (n == 0) throw cybozu::Exception("mcl:FpT:setModulo:bad mstr") << mstr; // default op_.neg = negW; @@ -88,7 +72,7 @@ public: op_.mul = mulW; const Unit *p = op_.p; op_.bitLen = bitLen; - fp::setOp(op_, p, bitLen); + op_.init(p, bitLen); op_.sq.set(op_.mp); } static inline void getModulo(std::string& pstr) @@ -195,7 +179,7 @@ public: } void getBlock(fp::Block& b) const { - assert(maxUnitN <= fp::Block::maxUnitN); + assert(maxN <= fp::maxUnitN); b.n = op_.N; if (op_.useMont) { op_.fromMont(b.v_, v_); @@ -393,7 +377,7 @@ public: } static inline void mulW(Unit *z, const Unit *x, const Unit *y) { - Unit xy[maxUnitN * 2]; + Unit xy[maxN * 2]; op_.mulPreP(xy, x, y); op_.modP(z, xy, op_.p); } diff --git a/include/mcl/fp_base.hpp b/include/mcl/fp_base.hpp index 68413b9..a2fd66f 100644 --- a/include/mcl/fp_base.hpp +++ b/include/mcl/fp_base.hpp @@ -23,13 +23,19 @@ #pragma warning(pop) #endif #include -#ifdef USE_MONT_FP -#include -#else + +#ifndef MCL_FP_BLOCK_MAX_BIT_N + #define MCL_FP_BLOCK_MAX_BIT_N 521 +#endif + namespace mcl { + struct FpGenerator; -} -#endif + +namespace montgomery { + +} } // mcl::montgomery + namespace mcl { namespace fp { @@ -38,6 +44,8 @@ typedef uint32_t Unit; #else typedef uint64_t Unit; #endif +const size_t UnitBitN = sizeof(Unit) * 8; +const size_t maxUnitN = (MCL_FP_BLOCK_MAX_BIT_N + UnitBitN - 1) / UnitBitN; struct Op; @@ -48,107 +56,7 @@ typedef void (*void3u)(Unit*, const Unit*, const Unit*); typedef void (*void4u)(Unit*, const Unit*, const Unit*, const Unit*); typedef int (*int2u)(Unit*, const Unit*); -} } // mcl::fp - -#ifdef MCL_USE_LLVM - -extern "C" { - -#define MCL_FP_DEF_FUNC(len) \ -void mcl_fp_add ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ -void mcl_fp_add ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ -void mcl_fp_sub ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ -void mcl_fp_sub ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ -void mcl_fp_mulPre ## len(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ -void mcl_fp_mont ## len(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, mcl::fp::Unit); - -MCL_FP_DEF_FUNC(128) -MCL_FP_DEF_FUNC(192) -MCL_FP_DEF_FUNC(256) -MCL_FP_DEF_FUNC(320) -MCL_FP_DEF_FUNC(384) -MCL_FP_DEF_FUNC(448) -MCL_FP_DEF_FUNC(512) -#if CYBOZU_OS_BIT == 32 -MCL_FP_DEF_FUNC(160) -MCL_FP_DEF_FUNC(224) -MCL_FP_DEF_FUNC(288) -MCL_FP_DEF_FUNC(352) -MCL_FP_DEF_FUNC(416) -MCL_FP_DEF_FUNC(480) -MCL_FP_DEF_FUNC(544) -#else -MCL_FP_DEF_FUNC(576) -#endif - -void mcl_fp_mul_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); - -} - -#endif - -namespace mcl { namespace fp { - -namespace local { - -inline int compareArray(const Unit* x, const Unit* y, size_t n) -{ - for (size_t i = n - 1; i != size_t(-1); i--) { - if (x[i] < y[i]) return -1; - if (x[i] > y[i]) return 1; - } - return 0; -} - -inline bool isEqualArray(const Unit* x, const Unit* y, size_t n) -{ - for (size_t i = 0; i < n; i++) { - if (x[i] != y[i]) return false; - } - return true; -} - -inline bool isZeroArray(const Unit *x, size_t n) -{ - for (size_t i = 0; i < n; i++) { - if (x[i]) return false; - } - return true; -} - -inline void clearArray(Unit *x, size_t begin, size_t end) -{ - for (size_t i = begin; i < end; i++) x[i] = 0; -} - -inline void copyArray(Unit *y, const Unit *x, size_t n) -{ - for (size_t i = 0; i < n; i++) y[i] = x[i]; -} - -inline void toArray(Unit *y, size_t yn, const mpz_srcptr x) -{ - const int xn = x->_mp_size; - assert(xn >= 0); - const Unit* xp = (const Unit*)x->_mp_d; - assert(xn <= (int)yn); - copyArray(y, xp, xn); - clearArray(y, xn, yn); -} - -} // mcl::fp::local -struct TagDefault; - -#ifndef MCL_FP_BLOCK_MAX_BIT_N - #define MCL_FP_BLOCK_MAX_BIT_N 521 -#endif - -FpGenerator *createFpGenerator(); -void destroyFpGenerator(FpGenerator*); - struct Op { - static const size_t UnitByteN = sizeof(Unit); - static const size_t maxUnitN = (MCL_FP_BLOCK_MAX_BIT_N + UnitByteN * 8 - 1) / (UnitByteN * 8); mpz_class mp; mcl::SquareRoot sq; Unit p[maxUnitN]; @@ -204,7 +112,87 @@ struct Op { { mul(y, x, RR); } + void init(const Unit *p, size_t bitLen); + static FpGenerator* createFpGenerator(); + static void destroyFpGenerator(FpGenerator *fg); }; +/* + get pp such that p * pp = -1 mod M, + where p is prime and M = 1 << 64(or 32). + @param pLow [in] p mod M + T is uint32_t or uint64_t +*/ +inline Unit getMontgomeryCoeff(Unit pLow) +{ + Unit ret = 0; + Unit t = 0; + Unit x = 1; + + for (size_t i = 0; i < UnitBitN; i++) { + if ((t & 1) == 0) { + t += pLow; + ret += x; + } + t >>= 1; + x <<= 1; + } + return ret; +} + +namespace local { + +inline int compareArray(const Unit* x, const Unit* y, size_t n) +{ + for (size_t i = n - 1; i != size_t(-1); i--) { + if (x[i] < y[i]) return -1; + if (x[i] > y[i]) return 1; + } + return 0; +} + +inline bool isEqualArray(const Unit* x, const Unit* y, size_t n) +{ + for (size_t i = 0; i < n; i++) { + if (x[i] != y[i]) return false; + } + return true; +} + +inline bool isZeroArray(const Unit *x, size_t n) +{ + for (size_t i = 0; i < n; i++) { + if (x[i]) return false; + } + return true; +} + +inline void clearArray(Unit *x, size_t begin, size_t end) +{ + for (size_t i = begin; i < end; i++) x[i] = 0; +} + +inline void copyArray(Unit *y, const Unit *x, size_t n) +{ + for (size_t i = 0; i < n; i++) y[i] = x[i]; +} + +inline void toArray(Unit *y, size_t yn, const mpz_srcptr x) +{ + const int xn = x->_mp_size; + assert(xn >= 0); + const Unit* xp = (const Unit*)x->_mp_d; + assert(xn <= (int)yn); + copyArray(y, xp, xn); + clearArray(y, xn, yn); +} + +} // mcl::fp::local + +struct Block { + const Unit *p; // pointer to original FpT.v_ + size_t n; + Unit v_[maxUnitN]; +}; } } // mcl::fp diff --git a/include/mcl/fp_generator.hpp b/include/mcl/fp_generator.hpp index 9820ca9..02bdbb9 100644 --- a/include/mcl/fp_generator.hpp +++ b/include/mcl/fp_generator.hpp @@ -9,36 +9,7 @@ #include #include #include - -namespace mcl { - -namespace montgomery { - -/* - get pp such that p * pp = -1 mod M, - where p is prime and M = 1 << 64(or 32). - @param pLow [in] p mod M - T is uint32_t or uint64_t -*/ -template -T getCoff(T pLow) -{ - T ret = 0; - T t = 0; - T x = 1; - - for (size_t i = 0; i < sizeof(T) * 8; i++) { - if ((t & 1) == 0) { - t += pLow; - ret += x; - } - t >>= 1; - x <<= 1; - } - return ret; -} - -} } // mcl::montgomery +#include #if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64) @@ -217,7 +188,7 @@ struct FpGenerator : Xbyak::CodeGenerator { { if (pn < 2) throw cybozu::Exception("mcl:FpGenerator:small pn") << pn; p_ = p; - pp_ = montgomery::getCoff(p[0]); + pp_ = fp::getMontgomeryCoeff(p[0]); pn_ = pn; isFullBit_ = (p_[pn_ - 1] >> 63) != 0; // printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_); diff --git a/include/mcl/fp_proto.hpp b/include/mcl/fp_proto.hpp new file mode 100644 index 0000000..b9e6f8b --- /dev/null +++ b/include/mcl/fp_proto.hpp @@ -0,0 +1,47 @@ +#pragma once +/** + @file + @brief prototype of asm function + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include + +#ifdef MCL_USE_LLVM + +extern "C" { + +#define MCL_FP_DEF_FUNC(len) \ +void mcl_fp_add ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_add ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_sub ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_sub ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_mulPre ## len(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_mont ## len(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, mcl::fp::Unit); + +MCL_FP_DEF_FUNC(128) +MCL_FP_DEF_FUNC(192) +MCL_FP_DEF_FUNC(256) +MCL_FP_DEF_FUNC(320) +MCL_FP_DEF_FUNC(384) +MCL_FP_DEF_FUNC(448) +MCL_FP_DEF_FUNC(512) +#if CYBOZU_OS_BIT == 32 +MCL_FP_DEF_FUNC(160) +MCL_FP_DEF_FUNC(224) +MCL_FP_DEF_FUNC(288) +MCL_FP_DEF_FUNC(352) +MCL_FP_DEF_FUNC(416) +MCL_FP_DEF_FUNC(480) +MCL_FP_DEF_FUNC(544) +#else +MCL_FP_DEF_FUNC(576) +#endif + +void mcl_fp_mul_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); + +} + +#endif + diff --git a/src/fp.cpp b/src/fp.cpp index bd1692b..0274cf0 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -1,24 +1,26 @@ #include +#ifdef USE_MONT_FP +#include +#endif +#include namespace mcl { namespace fp { #ifdef USE_MONT_FP -FpGenerator *createFpGenerator() +FpGenerator *Op::createFpGenerator() { return new FpGenerator(); } - -void destroyFpGenerator(FpGenerator* fg) +void Op::destroyFpGenerator(FpGenerator *fg) { delete fg; } #else -FpGenerator *createFpGenerator() +FpGenerator *Op::createFpGenerator() { return 0; } - -void destroyFpGenerator(FpGenerator*) +void Op::destroyFpGenerator(FpGenerator *) { } #endif @@ -124,29 +126,27 @@ struct OpeFunc { #ifdef MCL_USE_LLVM #define SET_OP_LLVM(n) \ - op.addP = mcl_fp_add ## n ##S; \ - op.subP = mcl_fp_sub ## n ##S; \ - op.mulPreP = mcl_fp_mulPre ## n; + addP = mcl_fp_add ## n ##S; \ + subP = mcl_fp_sub ## n ##S; \ + mulPreP = mcl_fp_mulPre ## n; #else #define SET_OP_LLVM(n) #endif #define SET_OP(n) \ - op.N = n / UnitBitN; \ - op.isZero = OpeFunc::isZeroC; \ - op.clear = OpeFunc::clearC; \ - op.copy = OpeFunc::copyC; \ - op.negP = OpeFunc::negC; \ - op.invOp = OpeFunc::invOp; \ - op.addP = OpeFunc::addC; \ - op.subP = OpeFunc::subC; \ - op.mulPreP = OpeFunc::mulPreC; \ - op.modP = OpeFunc::modC; \ + N = n / UnitBitN; \ + isZero = OpeFunc::isZeroC; \ + clear = OpeFunc::clearC; \ + copy = OpeFunc::copyC; \ + negP = OpeFunc::negC; \ + invOp = OpeFunc::invOp; \ + addP = OpeFunc::addC; \ + subP = OpeFunc::subC; \ + mulPreP = OpeFunc::mulPreC; \ + modP = OpeFunc::modC; \ SET_OP_LLVM(n) #ifdef USE_MONT_FP -const size_t UnitByteN = sizeof(Unit); -const size_t maxUnitN = (MCL_FP_BLOCK_MAX_BIT_N + UnitByteN * 8 - 1) / (UnitByteN * 8); inline void invOpForMont(Unit *y, const Unit *x, const Op& op) { Unit r[maxUnitN]; @@ -158,11 +158,11 @@ inline void invOpForMont(Unit *y, const Unit *x, const Op& op) */ op.mul(y, r, op.invTbl.data() + k * op.N); } -inline void fromRawGmp(Unit *y, size_t n, const mpz_class& x) +static void fromRawGmp(Unit *y, size_t n, const mpz_class& x) { local::toArray(y, n, x.get_mpz_t()); } -inline void initInvTbl(Op& op, size_t N) +static void initInvTbl(Op& op, size_t N) { assert(N <= maxUnitN); const size_t invTblN = N * sizeof(Unit) * 8 * 2; @@ -177,7 +177,7 @@ inline void initInvTbl(Op& op, size_t N) } } -inline void initForMont(Op& op, const Unit *p) +static void initForMont(Op& op, const Unit *p) { size_t N = (op.bitLen + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8); if (N < 2) N = 2; @@ -203,7 +203,7 @@ inline void initForMont(Op& op, const Unit *p) #endif -void setOp(mcl::fp::Op& op, const Unit* p, size_t bitLen) +void Op::init(const Unit* p, size_t bitLen) { assert(sizeof(mp_limb_t) == sizeof(Unit)); const size_t UnitBitN = sizeof(Unit) * 8; @@ -241,12 +241,12 @@ void setOp(mcl::fp::Op& op, const Unit* p, size_t bitLen) #endif #ifdef MCL_USE_LLVM - if (op.mp == mpz_class("0xfffffffffffffffffffffffffffffffeffffffffffffffff")) { - op.mul = &mcl_fp_mul_NIST_P192; // slower than MontFp192 + if (mp == mpz_class("0xfffffffffffffffffffffffffffffffeffffffffffffffff")) { + mul = &mcl_fp_mul_NIST_P192; // slower than MontFp192 } #endif #ifdef USE_MONT_FP - fp::initForMont(op, p); + fp::initForMont(*this, p); #endif } diff --git a/test/mont_fp_test.cpp b/test/mont_fp_test.cpp index e4843c5..25a6ced 100644 --- a/test/mont_fp_test.cpp +++ b/test/mont_fp_test.cpp @@ -24,7 +24,7 @@ struct Montgomery { explicit Montgomery(const mpz_class& p) { p_ = p; - pp_ = mcl::montgomery::getCoff(mcl::Gmp::getBlock(p, 0)); + pp_ = mcl::fp::getMontgomeryCoeff(mcl::Gmp::getBlock(p, 0)); pn_ = mcl::Gmp::getBlockSize(p); R_ = 1; R_ = (R_ << (pn_ * 64)) % p_;