diff --git a/misc/low_test.cpp b/misc/low_test.cpp index 7163902..62f84af 100644 --- a/misc/low_test.cpp +++ b/misc/low_test.cpp @@ -1,4 +1,14 @@ #include +#include + +void dump(const char *msg, const uint32_t *x, size_t n) +{ + printf("%s", msg); + for (size_t i = 0; i < n; i++) { + printf("%08x", x[n - 1 - i]); + } + printf("\n"); +} #include "../src/low_funct.hpp" #define MCL_USE_VINT @@ -9,6 +19,7 @@ #include #include #include +#include template void setRand(uint32_t *x, size_t n, RG& rg) @@ -59,3 +70,82 @@ CYBOZU_TEST_AUTO(mulT) mulTest<8>(); mulTest<12>(); } + +struct Montgomery { + mcl::Vint p_; + mcl::Vint R_; // (1 << (pn_ * 64)) % p + mcl::Vint RR_; // (R * R) % p + uint32_t rp_; // rp * p = -1 mod M = 1 << 64 + size_t pn_; + Montgomery() {} + explicit Montgomery(const mcl::Vint& p) + { + p_ = p; + rp_ = mcl::fp::getMontgomeryCoeff(p.getUnit()[0]); + pn_ = p.getUnitSize(); + R_ = 1; + R_ = (R_ << (pn_ * 64)) % p_; + RR_ = (R_ * R_) % p_; + } + + void toMont(mcl::Vint& x) const { mul(x, x, RR_); } + void fromMont(mcl::Vint& x) const { mul(x, x, 1); } + + void mul(mcl::Vint& z, const mcl::Vint& x, const mcl::Vint& y) const + { + const size_t ySize = y.getUnitSize(); + mcl::Vint c = x * y.getUnit()[0]; + uint32_t q = c.getUnit()[0] * rp_; + c += p_ * q; + c >>= sizeof(uint32_t) * 8; + for (size_t i = 1; i < pn_; i++) { + if (i < ySize) { + c += x * y.getUnit()[i]; + } + uint32_t q = c.getUnit()[0] * rp_; + c += p_ * q; + c >>= sizeof(uint32_t) * 8; + } + if (c >= p_) { + c -= p_; + } + z = c; + } +}; + +template +void montTest(const char *pStr) +{ + mcl::Vint vp; + vp.setStr(pStr); + Montgomery mont(vp); + + cybozu::XorShift rg; + uint32_t x[N]; + uint32_t y[N]; + uint32_t z[N]; + uint32_t _p[N + 1]; + uint32_t *const p = _p + 1; + vp.getArray(p, N); + p[-1] = mont.rp_; + + for (size_t i = 0; i < 1000; i++) { + setRand(x, N, rg); + setRand(y, N, rg); + // remove MSB + x[N - 1] &= 0x7fffffff; + y[N - 1] &= 0x7fffffff; + mcl::Vint vx, vy, vz; + vx.setArray(x, N); + vy.setArray(y, N); + mont.mul(vz, vx, vy); + mcl::montT(z, x, y, p); + CYBOZU_TEST_EQUAL_ARRAY(z, vz.getUnit(), N); + } +} + +CYBOZU_TEST_AUTO(mont) +{ + const char *pStr = "0x2523648240000001ba344d80000000086121000000000013a700000000000013"; + montTest<8>(pStr); +} diff --git a/src/low_funct.hpp b/src/low_funct.hpp index 919c574..23e5885 100644 --- a/src/low_funct.hpp +++ b/src/low_funct.hpp @@ -4,12 +4,13 @@ @author MITSUNARI Shigeo(@herumi) @license modified new BSD license http://opensource.org/licenses/BSD-3-Clause + @note for only 32bit not full bit prime version + assert((p[N - 1] & 0x80000000) == 0); */ #include #include #include -// only for 32bit not full bit prime version namespace mcl { @@ -159,5 +160,28 @@ void subModT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t * } } +/* + z[N] = Montgomery(x[N], y[N], p[N]) + @remark : assume p[-1] = rp +*/ +template +void montT(uint32_t *z, const uint32_t *x, const uint32_t *y, const uint32_t *p) +{ + const uint32_t rp = p[-1]; + assert((p[N - 1] & 0x80000000) == 0); + uint32_t buf[N * 2]; + buf[N] = mulUnitT(buf, x, y[0]); + uint32_t q = buf[0] * rp; + buf[N] += addMulUnitT(buf, p, q); + for (size_t i = 1; i < N; i++) { + buf[N + i] = addMulUnitT(buf + i, x, y[i]); + uint32_t q = buf[i] * rp; + buf[N + i] += addMulUnitT(buf + i, p, q); + } + if (subT(z, buf + N, p)) { + copyT(z, buf + N); + } +} + } // mcl