diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..575892f --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,14 @@ +name: test +on: [push] + +jobs: + build: + name: test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: make test_ci DEBUG=1 -j3 + - run: make clean + - run: make test_ci DEBUG=1 -j3 CXX=clang++ + - run: make clean + - run: make test_go diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 73a97e6..0000000 --- a/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -sudo: true -dist: trusty -language: cpp -compiler: - - gcc - - clang -addons: - apt: - packages: - - libgmp-dev -script: - - make test_ci DEBUG=1 -j3 - - make clean - - make test_ci CFLAGS_USER=-DMCL_DONT_USE_XBYAK -j3 - - make clean - - make test_go - diff --git a/common.mk b/common.mk index 707aa35..c42d1ca 100644 --- a/common.mk +++ b/common.mk @@ -91,7 +91,7 @@ else CFLAGS_OPT+=-O3 else ifeq ($(shell expr $(GCC_VER) \> 4.6.0),1) - CFLAGS_OPT+=-Ofast + CFLAGS_OPT+=-O3 else CFLAGS_OPT+=-O3 endif diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp index d8a0fc1..13bfe5f 100644 --- a/include/mcl/ec.hpp +++ b/include/mcl/ec.hpp @@ -301,7 +301,7 @@ void dblJacobi(E& R, const E& P) xy += xy; // 4xy^2 switch (E::specialA_) { case Zero: - F::add(t, x2, x2); + F::mul2(t, x2); x2 += t; break; case Minus3: @@ -312,7 +312,7 @@ void dblJacobi(E& R, const E& P) F::sqr(t, t); x2 -= t; } - F::add(t, x2, x2); + F::mul2(t, x2); x2 += t; break; case GenericA: @@ -325,7 +325,7 @@ void dblJacobi(E& R, const E& P) t *= E::a_; } t += x2; - x2 += x2; + F::mul2(x2, x2); x2 += t; break; } @@ -337,12 +337,12 @@ void dblJacobi(E& R, const E& P) } else { F::mul(R.z, P.y, P.z); } - R.z += R.z; + F::mul2(R.z, R.z); F::sub(R.y, xy, R.x); R.y *= x2; - y2 += y2; - y2 += y2; - y2 += y2; + F::mul2(y2, y2); + F::mul2(y2, y2); + F::mul2(y2, y2); R.y -= y2; } diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index d49b6be..b85b691 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -165,6 +165,8 @@ public: if (sqr == 0) sqr = sqrC; mul2 = fp::func_ptr_cast(op_.fp_mul2A_); if (mul2 == 0) mul2 = mul2C; + mul9 = fp::func_ptr_cast(op_.fp_mul9A_); + if (mul9 == 0) mul9 = mul9C; #endif *pb = true; } @@ -499,6 +501,8 @@ public: static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); } static void (*mul2)(FpT& y, const FpT& x); static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); } + static void (*mul9)(FpT& y, const FpT& x); + static inline void mul9C(FpT& y, const FpT& x) { mulSmall(y, x, 9); } #else static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); } static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); } @@ -506,9 +510,20 @@ public: static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); } static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); } static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); } + static inline void mul9(FpT& y, const FpT& x) { mulSmall(y, x, 9); } #endif static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); } static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); } + static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y) + { + assert(y <= op_.smallModp.maxMulN); + Unit xy[maxSize + 1]; + op_.fp_mulUnitPre(xy, x.v_, y); + int v = op_.smallModp.approxMul(xy); + const Unit *pv = op_.smallModp.getPmul(v); + op_.fp_subPre(z.v_, xy, pv); + op_.fp_sub(z.v_, z.v_, op_.p, op_.p); + } static inline void mulUnit(FpT& z, const FpT& x, const Unit y) { if (mulSmallUnit(z, x, y)) return; @@ -746,6 +761,7 @@ template void (*FpT::neg)(FpT& y, template void (*FpT::mul)(FpT& z, const FpT& x, const FpT& y); template void (*FpT::sqr)(FpT& y, const FpT& x); template void (*FpT::mul2)(FpT& y, const FpT& x); +template void (*FpT::mul9)(FpT& y, const FpT& x); #endif } // mcl diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp index ed0880b..c5e9700 100644 --- a/include/mcl/gmp_util.hpp +++ b/include/mcl/gmp_util.hpp @@ -942,6 +942,85 @@ public: #endif }; +/* + x mod p for a small value x < (pMulTblN * p). +*/ +struct SmallModp { + typedef mcl::fp::Unit Unit; + static const size_t unitBitSize = sizeof(Unit) * 8; + static const size_t maxTblSize = (MCL_MAX_BIT_SIZE + unitBitSize - 1) / unitBitSize + 1; + static const size_t maxMulN = 9; + static const size_t pMulTblN = maxMulN + 1; + int N_; + uint32_t shiftL_; + uint32_t shiftR_; + uint32_t maxIdx_; + // pMulTbl_[i] = (p * i) >> (pBitSize_ - 1) + Unit pMulTbl_[pMulTblN][maxTblSize]; + // idxTbl_[x] = (x << (pBitSize_ - 1)) / p + uint8_t idxTbl_[pMulTblN * 2]; + // return x >> (pBitSize_ - 1) + SmallModp() + : N_(0) + , shiftL_(0) + , shiftR_(0) + , maxIdx_(0) + , pMulTbl_() + , idxTbl_() + { + } + // return argmax { i : x > i * p } + uint32_t approxMul(const Unit *x) const + { + uint32_t top = getTop(x); + assert(top <= maxIdx_); + return idxTbl_[top]; + } + const Unit *getPmul(size_t v) const + { + assert(v < pMulTblN); + return pMulTbl_[v]; + } + uint32_t getTop(const Unit *x) const + { + return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_); + } + uint32_t cvtInt(const mpz_class& x) const + { + assert(mcl::gmp::getUnitSize(x) <= 1); + if (x == 0) { + return 0; + } else { + return uint32_t(mcl::gmp::getUnit(x)[0]); + } + } + void init(const mpz_class& p) + { + size_t pBitSize = mcl::gmp::getBitSize(p); + N_ = (pBitSize + unitBitSize - 1) / unitBitSize; + shiftR_ = (pBitSize - 1) % unitBitSize; + shiftL_ = unitBitSize - shiftR_; + mpz_class t = 0; + for (size_t i = 0; i < pMulTblN; i++) { + bool b; + mcl::gmp::getArray(&b, pMulTbl_[i], maxTblSize, t); + assert(b); + (void)b; + if (i == pMulTblN - 1) { + maxIdx_ = getTop(pMulTbl_[i]); + assert(maxIdx_ < CYBOZU_NUM_OF_ARRAY(idxTbl_)); + break; + } + t += p; + } + + for (uint32_t i = 0; i <= maxIdx_; i++) { + idxTbl_[i] = cvtInt((mpz_class(int(i)) << (pBitSize - 1)) / p); + } + } +}; + + /* Barrett Reduction for non GMP version diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 13fd0c5..4fa8941 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -191,6 +191,7 @@ struct Op { uint32_t pmod4; mcl::SquareRoot sq; mcl::Modp modp; + mcl::SmallModp smallModp; Unit half[maxUnitSize]; // (p + 1) / 2 Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery) /* @@ -215,6 +216,7 @@ struct Op { void3u fp_mulA_; void2u fp_sqrA_; void2u fp_mul2A_; + void2u fp_mul9A_; void3u fp2_addA_; void3u fp2_subA_; void2u fp2_negA_; @@ -304,6 +306,7 @@ struct Op { fp_mulA_ = 0; fp_sqrA_ = 0; fp_mul2A_ = 0; + fp_mul9A_ = 0; fp2_addA_ = 0; fp2_subA_ = 0; fp2_negA_ = 0; diff --git a/misc/snark-p.py b/misc/snark-p.py new file mode 100644 index 0000000..8168f3b --- /dev/null +++ b/misc/snark-p.py @@ -0,0 +1,13 @@ +p=21888242871839275222246405745257275088696311157297823662689037894645226208583 + +print("over 253 bit") +for i in range (10): + print(i, (p * i) >> 253) + +def maxarg(x): + return x // p + +print("maxarg") +for i in range(16): + print(i, maxarg(i << 253)) + diff --git a/readme.md b/readme.md index a134584..5c9f799 100644 --- a/readme.md +++ b/readme.md @@ -1,4 +1,4 @@ -[![Build Status](https://api.travis-ci.com/herumi/mcl.svg?branch=master)](https://travis-ci.com/github/herumi/mcl) +[![Build Status](https://github.com/herumi/mcl/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/mcl/actions/workflows/main.yml) # mcl diff --git a/src/fp.cpp b/src/fp.cpp index 3d7eed3..9f3c47c 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -639,6 +639,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size if (!b) return false; } modp.init(mp); + smallModp.init(mp); return fp::initForMont(*this, p, mode); } diff --git a/test/bench.hpp b/test/bench.hpp index 9a28db7..11ced31 100644 --- a/test/bench.hpp +++ b/test/bench.hpp @@ -116,6 +116,10 @@ void testBench(const G1& P, const G2& Q) CYBOZU_BENCH_C("Fp::sub ", C3, Fp::sub, x, x, y); CYBOZU_BENCH_C("Fp::add 2 ", C3, Fp::add, x, x, x); CYBOZU_BENCH_C("Fp::mul2 ", C3, Fp::mul2, x, x); + CYBOZU_BENCH_C("Fp::mulSmall8 ", C3, Fp::mulSmall, x, x, 8); + CYBOZU_BENCH_C("Fp::mulUnit8 ", C3, Fp::mulUnit, x, x, 8); + CYBOZU_BENCH_C("Fp::mul9 ", C3, Fp::mul9, x, x); + CYBOZU_BENCH_C("Fp::mulUnit9 ", C3, Fp::mulUnit, x, x, 9); CYBOZU_BENCH_C("Fp::neg ", C3, Fp::neg, x, x); CYBOZU_BENCH_C("Fp::mul ", C3, Fp::mul, x, x, y); CYBOZU_BENCH_C("Fp::sqr ", C3, Fp::sqr, x, x); diff --git a/test/common_test.hpp b/test/common_test.hpp index 338e7d3..74a745c 100644 --- a/test/common_test.hpp +++ b/test/common_test.hpp @@ -183,8 +183,24 @@ void testFp2Dbl_mul_xi1() } } +void testMulSmall() +{ + puts("testMulSmall"); + cybozu::XorShift rg; + for (int y = 0; y < 10; y++) { + for (int i = 0; i < 40; i++) { + Fp x, z1, z2; + x.setByCSPRNG(rg); + Fp::mulSmall(z1, x, y); + z2 = x * y; + CYBOZU_TEST_EQUAL(z1, z2); + } + } +} + void testCommon(const G1& P, const G2& Q) { + testMulSmall(); testFp2Dbl_mul_xi1(); testABCD(); testMul2();