diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 575892f..b7c5964 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,8 +7,15 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - run: make test_ci DEBUG=1 -j3 + - run: lscpu + - run: wget https://software.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.63.0-2021-01-18-lin.tar.bz2 + - run: bzip2 -dc sde-external-8.63.0-2021-01-18-lin.tar.bz2 | tar xvf - + - run: make bin/bn_test.exe DEBUG=1 -j4 + - run: sde-external-8.63.0-2021-01-18-lin/sde64 -hsw -- bin/bn_test.exe + - run: make test_ci DEBUG=1 -j4 || dmesg | tail - run: make clean - - run: make test_ci DEBUG=1 -j3 CXX=clang++ + - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail - run: make clean - run: make test_go + - run: sudo apt install openjdk-8-jdk + - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp index c5e9700..f3fcfa3 100644 --- a/include/mcl/gmp_util.hpp +++ b/include/mcl/gmp_util.hpp @@ -951,7 +951,7 @@ struct SmallModp { static const size_t maxTblSize = (MCL_MAX_BIT_SIZE + unitBitSize - 1) / unitBitSize + 1; static const size_t maxMulN = 9; static const size_t pMulTblN = maxMulN + 1; - int N_; + uint32_t N_; uint32_t shiftL_; uint32_t shiftR_; uint32_t maxIdx_; @@ -997,7 +997,7 @@ struct SmallModp { void init(const mpz_class& p) { size_t pBitSize = mcl::gmp::getBitSize(p); - N_ = (pBitSize + unitBitSize - 1) / unitBitSize; + N_ = uint32_t((pBitSize + unitBitSize - 1) / unitBitSize); shiftR_ = (pBitSize - 1) % unitBitSize; shiftL_ = unitBitSize - shiftR_; mpz_class t = 0; diff --git a/misc/snark-p.py b/misc/snark-p.py index 8168f3b..cbb7f5a 100644 --- a/misc/snark-p.py +++ b/misc/snark-p.py @@ -11,3 +11,8 @@ print("maxarg") for i in range(16): print(i, maxarg(i << 253)) + +x=0x2c130429c1d4802eb8703197d038ebd5109f96aee333bd027963094f5bb33ad + +y = x * 9 +print(hex(y)) diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index ecd022c..d420980 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -1095,7 +1095,7 @@ private: */ void gen_fpDbl_mod4(const Reg64& z, const Reg64& xy, const Pack& t) { - if (!isFullBit_) { + if (!isFullBit_ && useMulx_ && useAdx_) { gen_fpDbl_mod4NF(z, xy, t); return; } @@ -2547,7 +2547,7 @@ private: } void gen_fpDbl_mulPre(void3u& f) { - if (!(useMulx_ && useAdx_)) return; + if (!useMulx_ || (pn_ == 6 && !useAdx_)) return; void3u func = getCurr(); switch (pn_) { case 2: @@ -3656,25 +3656,25 @@ private: } { - Pack t = sf.t; + Pack t2 = sf.t; if (pn_ == 4) { - t = t.sub(0, pn_ * 2); + t2 = t2.sub(0, pn_ * 2); } else if (pn_ == 6) { - t.append(gp1); - t.append(gp2); + t2.append(gp1); + t2.append(gp2); } - assert((int)t.size() == pn_ * 2); + assert((int)t2.size() == pn_ * 2); mov(gp0, ptr [z]); - load_rm(t, gp0 + FpByte_ * 2); - sub_rm(t, gp0); // d1 -= d0 - sub_rm(t, (RegExp)d2); // d1 -= d2 - store_mr(gp0 + FpByte_ * 2, t); + load_rm(t2, gp0 + FpByte_ * 2); + sub_rm(t2, gp0); // d1 -= d0 + sub_rm(t2, (RegExp)d2); // d1 -= d2 + store_mr(gp0 + FpByte_ * 2, t2); gen_raw_sub(gp0, gp0, d2, rax, pn_); const RegExp& d0H = gp0 + pn_ * 8; const RegExp& d2H = (RegExp)d2 + pn_ * 8; - gen_raw_fp_sub(d0H, d0H, d2H, t, true); + gen_raw_fp_sub(d0H, d0H, d2H, t2, true); } add(rsp, SS); ret(); diff --git a/test/bench.hpp b/test/bench.hpp index 11ced31..d407bc7 100644 --- a/test/bench.hpp +++ b/test/bench.hpp @@ -85,16 +85,18 @@ void testBench(const G1& P, const G2& Q) const int C3 = 100000; #if 1 const int C2 = 3000; - mpz_class a = x.getMpz(); - CYBOZU_BENCH_C("G1::mulCT ", C, G1::mulCT, Pa, P, a); - CYBOZU_BENCH_C("G1::mul ", C, G1::mul, Pa, Pa, a); - CYBOZU_BENCH_C("G1::add ", C, G1::add, Pa, Pa, P); - CYBOZU_BENCH_C("G1::dbl ", C, G1::dbl, Pa, Pa); - CYBOZU_BENCH_C("G2::mulCT ", C, G2::mulCT, Qa, Q, a); - CYBOZU_BENCH_C("G2::mul ", C, G2::mul, Qa, Qa, a); - CYBOZU_BENCH_C("G2::add ", C, G2::add, Qa, Qa, Q); - CYBOZU_BENCH_C("G2::dbl ", C, G2::dbl, Qa, Qa); - CYBOZU_BENCH_C("GT::pow ", C, GT::pow, e1, e1, a); + { + mpz_class a = x.getMpz(); + CYBOZU_BENCH_C("G1::mulCT ", C, G1::mulCT, Pa, P, a); + CYBOZU_BENCH_C("G1::mul ", C, G1::mul, Pa, Pa, a); + CYBOZU_BENCH_C("G1::add ", C, G1::add, Pa, Pa, P); + CYBOZU_BENCH_C("G1::dbl ", C, G1::dbl, Pa, Pa); + CYBOZU_BENCH_C("G2::mulCT ", C, G2::mulCT, Qa, Q, a); + CYBOZU_BENCH_C("G2::mul ", C, G2::mul, Qa, Qa, a); + CYBOZU_BENCH_C("G2::add ", C, G2::add, Qa, Qa, Q); + CYBOZU_BENCH_C("G2::dbl ", C, G2::dbl, Qa, Qa); + CYBOZU_BENCH_C("GT::pow ", C, GT::pow, e1, e1, a); + } // CYBOZU_BENCH_C("GT::powGLV ", C, BN::param.glv2.pow, e1, e1, a); G1 PP; G2 QQ;