#include #define MCL_USE_LLVM #include #include #include #include #include #include #include #include #if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64) #define USE_XBYAK static mcl::FpGenerator fg; #endif #define PUT(x) std::cout << #x "=" << (x) << std::endl const size_t MAX_N = 32; typedef mcl::fp::Unit Unit; size_t getUnitN(size_t bitLen) { return (bitLen + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8); } void setMpz(mpz_class& mx, const Unit *x, size_t n) { mcl::Gmp::setRaw(mx, x, n); } void getMpz(Unit *x, size_t n, const mpz_class& mx) { mcl::fp::toArray(x, n, mx.get_mpz_t()); } struct Montgomery { mpz_class p_; mpz_class R_; // (1 << (n_ * 64)) % p mpz_class RR_; // (R * R) % p Unit r_; // p * r = -1 mod M = 1 << 64 size_t n_; Montgomery() {} explicit Montgomery(const mpz_class& p) { p_ = p; r_ = mcl::montgomery::getCoff(mcl::Gmp::getBlock(p, 0)); n_ = mcl::Gmp::getBlockSize(p); R_ = 1; R_ = (R_ << (n_ * 64)) % p_; RR_ = (R_ * R_) % p_; } void toMont(mpz_class& x) const { mul(x, x, RR_); } void fromMont(mpz_class& x) const { mul(x, x, 1); } void mont(Unit *z, const Unit *x, const Unit *y) const { mpz_class mx, my; setMpz(mx, x, n_); setMpz(my, y, n_); mul(mx, mx, my); getMpz(z, n_, mx); } void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) const { #if 1 const size_t ySize = mcl::Gmp::getBlockSize(y); mpz_class c = y == 0 ? mpz_class(0) : x * mcl::Gmp::getBlock(y, 0); Unit q = c == 0 ? 0 : mcl::Gmp::getBlock(c, 0) * r_; c += p_ * q; c >>= sizeof(Unit) * 8; for (size_t i = 1; i < n_; i++) { if (i < ySize) { c += x * mcl::Gmp::getBlock(y, i); } Unit q = c == 0 ? 0 : mcl::Gmp::getBlock(c, 0) * r_; c += p_ * q; c >>= sizeof(Unit) * 8; } if (c >= p_) { c -= p_; } z = c; #else z = x * y; const size_t zSize = mcl::Gmp::getBlockSize(z); for (size_t i = 0; i < n_; i++) { if (i < zSize) { Unit q = mcl::Gmp::getBlock(z, 0) * r_; z += p_ * (mp_limb_t)q; } z >>= sizeof(Unit) * 8; } if (z >= p_) { z -= p_; } #endif } }; void put(const char *msg, const Unit *x, size_t n) { printf("%s ", msg); for (size_t i = 0; i < n; i++) printf("%016llx ", (long long)x[n - 1 - i]); printf("\n"); } void verifyEqual(const Unit *x, const Unit *y, size_t n, const char *file, int line) { bool ok = mcl::fp::isEqualArray(x, y, n); CYBOZU_TEST_ASSERT(ok); if (ok) return; printf("%s:%d\n", file, line); put("L", x, n); put("R", y, n); exit(1); } #define VERIFY_EQUAL(x, y, n) verifyEqual(x, y, n, __FILE__, __LINE__) void addC(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) { mpz_class mx, my, mp; setMpz(mx, x, n); setMpz(my, y, n); setMpz(mp, p, n); mx += my; if (mx >= mp) mx -= mp; getMpz(z, n, mx); } void subC(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) { mpz_class mx, my, mp; setMpz(mx, x, n); setMpz(my, y, n); setMpz(mp, p, n); mx -= my; if (mx < 0) mx += mp; getMpz(z, n, mx); } static inline void set_zero(mpz_t& z, Unit *p, size_t n) { z->_mp_alloc = (int)n; z->_mp_size = 0; z->_mp_d = (mp_limb_t*)p; } static inline void set_mpz_t(mpz_t& z, const Unit* p, int n) { z->_mp_alloc = n; int i = n; while (i > 0 && p[i - 1] == 0) { i--; } z->_mp_size = i; z->_mp_d = (mp_limb_t*)p; } // z[2n] <- x[n] * y[n] void mulPreC(Unit *z, const Unit *x, const Unit *y, size_t n) { #if 1 mpz_t mx, my, mz; set_zero(mz, z, n * 2); set_mpz_t(mx, x, n); set_mpz_t(my, y, n); mpz_mul(mz, mx, my); mcl::fp::toArray(z, n * 2, mz); #else mpz_class mx, my; setMpz(mx, x, n); setMpz(my, y, n); mx *= my; getMpz(z, n * 2, mx); #endif } void modC(Unit *y, const Unit *x, const Unit *p, size_t n) { mpz_t mx, my, mp; set_mpz_t(mx, x, n * 2); set_mpz_t(my, y, n); set_mpz_t(mp, p, n); mpz_mod(my, mx, mp); mcl::fp::clearArray(y, my->_mp_size, n); } void mul(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) { Unit ret[MAX_N * 2]; mpz_t mx, my, mz, mp; set_zero(mz, ret, MAX_N * 2); set_mpz_t(mx, x, n); set_mpz_t(my, y, n); set_mpz_t(mp, p, n); mpz_mul(mz, mx, my); mpz_mod(mz, mz, mp); mcl::fp::toArray(z, n, mz); } typedef mcl::fp::void3op void3op; typedef mcl::fp::void4op void4op; typedef mcl::fp::void4Iop void4Iop; const struct FuncOp { size_t bitLen; void4op addS; void4op addL; void4op subS; void4op subL; void3op mulPre; void4Iop mont; } gFuncOpTbl[] = { { 128, mcl_fp_add128S, mcl_fp_add128L, mcl_fp_sub128S, mcl_fp_sub128L, mcl_fp_mul128pre, mcl_fp_mont128 }, { 192, mcl_fp_add192S, mcl_fp_add192L, mcl_fp_sub192S, mcl_fp_sub192L, mcl_fp_mul192pre, mcl_fp_mont192 }, { 256, mcl_fp_add256S, mcl_fp_add256L, mcl_fp_sub256S, mcl_fp_sub256L, mcl_fp_mul256pre, mcl_fp_mont256 }, { 320, mcl_fp_add320S, mcl_fp_add320L, mcl_fp_sub320S, mcl_fp_sub320L, mcl_fp_mul320pre, mcl_fp_mont320 }, { 384, mcl_fp_add384S, mcl_fp_add384L, mcl_fp_sub384S, mcl_fp_sub384L, mcl_fp_mul384pre, mcl_fp_mont384 }, { 448, mcl_fp_add448S, mcl_fp_add448L, mcl_fp_sub448S, mcl_fp_sub448L, mcl_fp_mul448pre, mcl_fp_mont448 }, { 512, mcl_fp_add512S, mcl_fp_add512L, mcl_fp_sub512S, mcl_fp_sub512L, mcl_fp_mul512pre, mcl_fp_mont512 }, #if CYBOZU_OS_BIT == 32 { 160, mcl_fp_add160S, mcl_fp_add160L, mcl_fp_sub160S, mcl_fp_sub160L, mcl_fp_mul160pre, mcl_fp_mont160 }, { 224, mcl_fp_add224S, mcl_fp_add224L, mcl_fp_sub224S, mcl_fp_sub224L, mcl_fp_mul224pre, mcl_fp_mont224 }, { 288, mcl_fp_add288S, mcl_fp_add288L, mcl_fp_sub288S, mcl_fp_sub288L, mcl_fp_mul288pre, mcl_fp_mont288 }, { 352, mcl_fp_add352S, mcl_fp_add352L, mcl_fp_sub352S, mcl_fp_sub352L, mcl_fp_mul352pre, mcl_fp_mont352 }, { 416, mcl_fp_add416S, mcl_fp_add416L, mcl_fp_sub416S, mcl_fp_sub416L, mcl_fp_mul416pre, mcl_fp_mont416 }, { 480, mcl_fp_add480S, mcl_fp_add480L, mcl_fp_sub480S, mcl_fp_sub480L, mcl_fp_mul480pre, mcl_fp_mont480 }, { 544, mcl_fp_add544S, mcl_fp_add544L, mcl_fp_sub544S, mcl_fp_sub544L, mcl_fp_mul544pre, mcl_fp_mont544 }, #else { 576, mcl_fp_add576S, mcl_fp_add576L, mcl_fp_sub576S, mcl_fp_sub576L, mcl_fp_mul576pre, mcl_fp_mont576 }, #endif }; FuncOp getFuncOp(size_t bitLen) { typedef std::map Map; static Map map; static bool init = false; if (!init) { init = true; for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(gFuncOpTbl); i++) { map[gFuncOpTbl[i].bitLen] = gFuncOpTbl[i]; } } for (Map::const_iterator i = map.begin(), ie = map.end(); i != ie; ++i) { if (bitLen <= i->second.bitLen) { return i->second; } } printf("ERR bitLen=%d\n", (int)bitLen); exit(1); } void test(const Unit *p, size_t bitLen) { printf("bitLen %d\n", (int)bitLen); const size_t n = getUnitN(bitLen); #ifdef NDEBUG bool doBench = true; #else bool doBench = false; #endif const FuncOp funcOp = getFuncOp(bitLen); const void4op addS = funcOp.addS; const void4op addL = funcOp.addL; const void4op subS = funcOp.subS; const void4op subL = funcOp.subL; const void3op mulPre = funcOp.mulPre; const void4Iop mont = funcOp.mont; mcl::fp::Unit x[MAX_N], y[MAX_N]; mcl::fp::Unit z[MAX_N], w[MAX_N]; mcl::fp::Unit z2[MAX_N * 2]; mcl::fp::Unit w2[MAX_N * 2]; cybozu::XorShift rg; mcl::fp::getRandVal(x, rg, p, bitLen); mcl::fp::getRandVal(y, rg, p, bitLen); const size_t C = 10; addC(z, x, y, p, n); addS(w, x, y, p); VERIFY_EQUAL(z, w, n); for (size_t i = 0; i < C; i++) { addC(z, y, z, p, n); addS(w, y, w, p); VERIFY_EQUAL(z, w, n); addC(z, y, z, p, n); addL(w, y, w, p); VERIFY_EQUAL(z, w, n); subC(z, x, z, p, n); subS(w, x, w, p); VERIFY_EQUAL(z, w, n); subC(z, x, z, p, n); subL(w, x, w, p); VERIFY_EQUAL(z, w, n); mulPreC(z2, x, z, n); mulPre(w2, x, z); VERIFY_EQUAL(z2, w2, n * 2); } { mpz_class mp; setMpz(mp, p, n); Montgomery m(mp); #ifdef USE_XBYAK if (bitLen > 128) fg.init(p, n); #endif /* real mont 0 0 1 R^-1 R 1 -1 -R^-1 -R -1 */ mpz_class t = 1; const mpz_class R = (t << (n * 64)) % mp; const mpz_class tbl[] = { 0, 1, R, mp - 1, mp - R }; for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { const mpz_class& mx = tbl[i]; for (size_t j = i; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) { const mpz_class& my = tbl[j]; getMpz(x, n, mx); getMpz(y, n, my); m.mont(z, x, y); mont(w, x, y, p, m.r_); VERIFY_EQUAL(z, w, n); #ifdef USE_XBYAK if (bitLen > 128) { fg.mul_(w, x, y); VERIFY_EQUAL(z, w, n); } #endif } } if (doBench) { // CYBOZU_BENCH("montC", m.mont, x, y, x); CYBOZU_BENCH("montA ", mont, x, y, x, p, m.r_); } } if (doBench) { // CYBOZU_BENCH("addS", addS, x, y, x, p); // slow // CYBOZU_BENCH("subS", subS, x, y, x, p); // CYBOZU_BENCH("addL", addL, x, y, x, p); // CYBOZU_BENCH("subL", subL, x, y, x, p); CYBOZU_BENCH("mulPreA", mulPre, w2, y, x); CYBOZU_BENCH("mulPreC", mulPreC, w2, y, x, n); CYBOZU_BENCH("modC ", modC, x, w2, p, n); } #ifdef USE_XBYAK if (bitLen <= 128) return; if (doBench) { fg.init(p, n); CYBOZU_BENCH("addA ", fg.add_, x, y, x); CYBOZU_BENCH("subA ", fg.sub_, x, y, x); // CYBOZU_BENCH("mulA", fg.mul_, x, y, x); } #endif printf("mont test %d\n", (int)bitLen); } CYBOZU_TEST_AUTO(all) { const struct { size_t n; const uint64_t p[9]; } tbl[] = { // { 2, { 0xf000000000000001, 1, } }, { 2, { 0x000000000000001d, 0x8000000000000000, } }, { 3, { 0x000000000000012b, 0x0000000000000000, 0x0000000080000000, } }, // { 3, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x07ffffffffffffff, } }, // { 3, { 0x7900342423332197, 0x1234567890123456, 0x1480948109481904, } }, { 3, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0xffffffffffffffff, } }, // { 4, { 0x7900342423332197, 0x4242342420123456, 0x1234567892342342, 0x1480948109481904, } }, // { 4, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x17ffffffffffffff, 0x1513423423423415, } }, { 4, { 0xa700000000000013, 0x6121000000000013, 0xba344d8000000008, 0x2523648240000001, } }, // { 5, { 0x0000000000000009, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, } }, { 5, { 0xfffffffffffffc97, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, // { 6, { 0x4720422423332197, 0x0034230847204720, 0x3456789012345679, 0x4820984290482212, 0x9482094820948209, 0x0194810841094810, } }, // { 6, { 0x7204224233321972, 0x0342308472047204, 0x4567890123456790, 0x0948204204243123, 0x2098420984209482, 0x2093482094810948, } }, { 6, { 0x00000000ffffffff, 0xffffffff00000000, 0xfffffffffffffffe, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, // { 7, { 0x0000000000000063, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, } }, { 7, { 0x000000000fffcff1, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, { 8, { 0xffffffffffffd0c9, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, { 9, { 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x00000000000001ff, } }, // { 9, { 0x4720422423332197, 0x0034230847204720, 0x3456789012345679, 0x2498540975555312, 0x9482904924029424, 0x0948209842098402, 0x1098410948109482, 0x0820958209582094, 0x0000000000000029, } }, // { 9, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x7fffffffffffffff, 0x8572938572398583, 0x5732057823857293, 0x9820948205872380, 0x3409238420492034, 0x9483842098340298, 0x0000000000000003, } }, }; for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { const size_t n = tbl[i].n; const size_t bitLen = (n - 1) * 64 + cybozu::bsr(tbl[i].p[n - 1]) + 1; test((const Unit*)tbl[i].p, bitLen); } }