Merge branch 'dev'

4 years ago · 7bfe60c537
parent 4fb3fec3db 98fc193f5f
commit 7bfe60c537
12 changed files with 155 additions and 26 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -0,0 +1,14 @@
 name: test
 on: [push]
 jobs:
  build:
    name: test
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2
    - run: make test_ci DEBUG=1 -j3
    - run: make clean
    - run: make test_ci DEBUG=1 -j3 CXX=clang++
    - run: make clean
    - run: make test_go
--- a/.travis.yml
+++ b/.travis.yml
@ -1,17 +0,0 @@
 sudo: true
 dist: trusty
 language: cpp
 compiler:
  - gcc
  - clang
 addons:
  apt:
    packages:
      - libgmp-dev
 script:
  - make test_ci DEBUG=1 -j3
  - make clean
  - make test_ci CFLAGS_USER=-DMCL_DONT_USE_XBYAK -j3
  - make clean
  - make test_go
--- a/common.mk
+++ b/common.mk
@ -91,7 +91,7 @@ else
    CFLAGS_OPT+=-O3
  else
    ifeq ($(shell expr $(GCC_VER) \> 4.6.0),1)
-      CFLAGS_OPT+=-Ofast
+      CFLAGS_OPT+=-O3
    else
      CFLAGS_OPT+=-O3
    endif
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@ -301,7 +301,7 @@ void dblJacobi(E& R, const E& P)
 	xy += xy; // 4xy^2
 	switch (E::specialA_) {
 	case Zero:
-		F::add(t, x2, x2);
+		F::mul2(t, x2);
 		x2 += t;
 		break;
 	case Minus3:
@ -312,7 +312,7 @@ void dblJacobi(E& R, const E& P)
 			F::sqr(t, t);
 			x2 -= t;
 		}
-		F::add(t, x2, x2);
+		F::mul2(t, x2);
 		x2 += t;
 		break;
 	case GenericA:
@ -325,7 +325,7 @@ void dblJacobi(E& R, const E& P)
 			t *= E::a_;
 		}
 		t += x2;
-		x2 += x2;
+		F::mul2(x2, x2);
 		x2 += t;
 		break;
 	}
@ -337,12 +337,12 @@ void dblJacobi(E& R, const E& P)
 	} else {
 		F::mul(R.z, P.y, P.z);
 	}
-	R.z += R.z;
+	F::mul2(R.z, R.z);
 	F::sub(R.y, xy, R.x);
 	R.y *= x2;
-	y2 += y2;
+	F::mul2(y2, y2);
-	y2 += y2;
+	F::mul2(y2, y2);
-	y2 += y2;
+	F::mul2(y2, y2);
 	R.y -= y2;
 }
--- a/include/mcl/fp.hpp
+++ b/include/mcl/fp.hpp
@ -165,6 +165,8 @@ public:
 		if (sqr == 0) sqr = sqrC;
 		mul2 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul2A_);
 		if (mul2 == 0) mul2 = mul2C;
 		mul9 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul9A_);
 		if (mul9 == 0) mul9 = mul9C;
 #endif
 		*pb = true;
 	}
@ -499,6 +501,8 @@ public:
 	static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
 	static void (*mul2)(FpT& y, const FpT& x);
 	static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
 	static void (*mul9)(FpT& y, const FpT& x);
 	static inline void mul9C(FpT& y, const FpT& x) { mulSmall(y, x, 9); }
 #else
 	static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
 	static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); }
@ -506,9 +510,20 @@ public:
 	static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
 	static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
 	static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
 	static inline void mul9(FpT& y, const FpT& x) { mulSmall(y, x, 9); }
 #endif
 	static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
 	static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
 	static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y)
 	{
 		assert(y <= op_.smallModp.maxMulN);
 		Unit xy[maxSize + 1];
 		op_.fp_mulUnitPre(xy, x.v_, y);
 		int v = op_.smallModp.approxMul(xy);
 		const Unit *pv = op_.smallModp.getPmul(v);
 		op_.fp_subPre(z.v_, xy, pv);
 		op_.fp_sub(z.v_, z.v_, op_.p, op_.p);
 	}
 	static inline void mulUnit(FpT& z, const FpT& x, const Unit y)
 	{
 		if (mulSmallUnit(z, x, y)) return;
@ -746,6 +761,7 @@ template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::neg)(FpT& y,
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul)(FpT& z, const FpT& x, const FpT& y);
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y, const FpT& x);
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul2)(FpT& y, const FpT& x);
 template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul9)(FpT& y, const FpT& x);
 #endif
 } // mcl
--- a/include/mcl/gmp_util.hpp
+++ b/include/mcl/gmp_util.hpp
@ -942,6 +942,85 @@ public:
 #endif
 };
 /*
 	x mod p for a small value x < (pMulTblN * p).
 */
 struct SmallModp {
 	typedef mcl::fp::Unit Unit;
 	static const size_t unitBitSize = sizeof(Unit) * 8;
 	static const size_t maxTblSize = (MCL_MAX_BIT_SIZE + unitBitSize - 1) / unitBitSize + 1;
 	static const size_t maxMulN = 9;
 	static const size_t pMulTblN = maxMulN + 1;
 	int N_;
 	uint32_t shiftL_;
 	uint32_t shiftR_;
 	uint32_t maxIdx_;
 	// pMulTbl_[i] = (p * i) >> (pBitSize_ - 1)
 	Unit pMulTbl_[pMulTblN][maxTblSize];
 	// idxTbl_[x] = (x << (pBitSize_ - 1)) / p
 	uint8_t idxTbl_[pMulTblN * 2];
 	// return x >> (pBitSize_ - 1)
 	SmallModp()
 		: N_(0)
 		, shiftL_(0)
 		, shiftR_(0)
 		, maxIdx_(0)
 		, pMulTbl_()
 		, idxTbl_()
 	{
 	}
 	// return argmax { i : x > i * p }
 	uint32_t approxMul(const Unit *x) const
 	{
 		uint32_t top = getTop(x);
 		assert(top <= maxIdx_);
 		return idxTbl_[top];
 	}
 	const Unit *getPmul(size_t v) const
 	{
 		assert(v < pMulTblN);
 		return pMulTbl_[v];
 	}
 	uint32_t getTop(const Unit *x) const
 	{
 		return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_);
 	}
 	uint32_t cvtInt(const mpz_class& x) const
 	{
 		assert(mcl::gmp::getUnitSize(x) <= 1);
 		if (x == 0) {
 			return 0;
 		} else {
 			return uint32_t(mcl::gmp::getUnit(x)[0]);
 		}
 	}
 	void init(const mpz_class& p)
 	{
 		size_t pBitSize = mcl::gmp::getBitSize(p);
 		N_ = (pBitSize + unitBitSize - 1) / unitBitSize;
 		shiftR_ = (pBitSize - 1) % unitBitSize;
 		shiftL_ = unitBitSize - shiftR_;
 		mpz_class t = 0;
 		for (size_t i = 0; i < pMulTblN; i++) {
 			bool b;
 			mcl::gmp::getArray(&b, pMulTbl_[i], maxTblSize, t);
 			assert(b);
 			(void)b;
 			if (i == pMulTblN - 1) {
 				maxIdx_ = getTop(pMulTbl_[i]);
 				assert(maxIdx_ < CYBOZU_NUM_OF_ARRAY(idxTbl_));
 				break;
 			}
 			t += p;
 		}
 		for (uint32_t i = 0; i <= maxIdx_; i++) {
 			idxTbl_[i] = cvtInt((mpz_class(int(i)) << (pBitSize - 1)) / p);
 		}
 	}
 };
 /*
 	Barrett Reduction
 	for non GMP version
--- a/include/mcl/op.hpp
+++ b/include/mcl/op.hpp
@ -191,6 +191,7 @@ struct Op {
 	uint32_t pmod4;
 	mcl::SquareRoot sq;
 	mcl::Modp modp;
 	mcl::SmallModp smallModp;
 	Unit half[maxUnitSize]; // (p + 1) / 2
 	Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery)
 	/*
@ -215,6 +216,7 @@ struct Op {
 	void3u fp_mulA_;
 	void2u fp_sqrA_;
 	void2u fp_mul2A_;
 	void2u fp_mul9A_;
 	void3u fp2_addA_;
 	void3u fp2_subA_;
 	void2u fp2_negA_;
@ -304,6 +306,7 @@ struct Op {
 		fp_mulA_ = 0;
 		fp_sqrA_ = 0;
 		fp_mul2A_ = 0;
 		fp_mul9A_ = 0;
 		fp2_addA_ = 0;
 		fp2_subA_ = 0;
 		fp2_negA_ = 0;
--- a/misc/snark-p.py
+++ b/misc/snark-p.py
@ -0,0 +1,13 @@
 p=21888242871839275222246405745257275088696311157297823662689037894645226208583
 print("over 253 bit")
 for i in range (10):
 	print(i, (p * i) >> 253)
 def maxarg(x):
 	return x // p
 print("maxarg")
 for i in range(16):
 	print(i, maxarg(i << 253))
--- a/readme.md
+++ b/readme.md
@ -1,4 +1,4 @@
-[![Build Status](https://api.travis-ci.com/herumi/mcl.svg?branch=master)](https://travis-ci.com/github/herumi/mcl)
+[![Build Status](https://github.com/herumi/mcl/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/mcl/actions/workflows/main.yml)
 # mcl
--- a/src/fp.cpp
+++ b/src/fp.cpp
@ -639,6 +639,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
 		if (!b) return false;
 	}
 	modp.init(mp);
 	smallModp.init(mp);
 	return fp::initForMont(*this, p, mode);
 }
--- a/test/bench.hpp
+++ b/test/bench.hpp
@ -116,6 +116,10 @@ void testBench(const G1& P, const G2& Q)
 	CYBOZU_BENCH_C("Fp::sub       ", C3, Fp::sub, x, x, y);
 	CYBOZU_BENCH_C("Fp::add 2     ", C3, Fp::add, x, x, x);
 	CYBOZU_BENCH_C("Fp::mul2      ", C3, Fp::mul2, x, x);
 	CYBOZU_BENCH_C("Fp::mulSmall8 ", C3, Fp::mulSmall, x, x, 8);
 	CYBOZU_BENCH_C("Fp::mulUnit8  ", C3, Fp::mulUnit, x, x, 8);
 	CYBOZU_BENCH_C("Fp::mul9      ", C3, Fp::mul9, x, x);
 	CYBOZU_BENCH_C("Fp::mulUnit9  ", C3, Fp::mulUnit, x, x, 9);
 	CYBOZU_BENCH_C("Fp::neg       ", C3, Fp::neg, x, x);
 	CYBOZU_BENCH_C("Fp::mul       ", C3, Fp::mul, x, x, y);
 	CYBOZU_BENCH_C("Fp::sqr       ", C3, Fp::sqr, x, x);
--- a/test/common_test.hpp
+++ b/test/common_test.hpp
@ -183,8 +183,24 @@ void testFp2Dbl_mul_xi1()
 	}
 }
 void testMulSmall()
 {
 	puts("testMulSmall");
 	cybozu::XorShift rg;
 	for (int y = 0; y < 10; y++) {
 		for (int i = 0; i < 40; i++) {
 			Fp x, z1, z2;
 			x.setByCSPRNG(rg);
 			Fp::mulSmall(z1, x, y);
 			z2 = x * y;
 			CYBOZU_TEST_EQUAL(z1, z2);
 		}
 	}
 }
 void testCommon(const G1& P, const G2& Q)
 {
 	testMulSmall();
 	testFp2Dbl_mul_xi1();
 	testABCD();
 	testMul2();
`@ -1,4 +1,4 @@`
	`[![Build Status](https://api.travis-ci.com/herumi/mcl.svg?branch=master)](https://travis-ci.com/github/herumi/mcl)`	`[![Build Status](https://github.com/herumi/mcl/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/mcl/actions/workflows/main.yml)`

	`# mcl`	`# mcl`