Merge branch 'dev'

update-fork
MITSUNARI Shigeo 4 years ago
commit 7bfe60c537
  1. 14
      .github/workflows/main.yml
  2. 17
      .travis.yml
  3. 2
      common.mk
  4. 14
      include/mcl/ec.hpp
  5. 16
      include/mcl/fp.hpp
  6. 79
      include/mcl/gmp_util.hpp
  7. 3
      include/mcl/op.hpp
  8. 13
      misc/snark-p.py
  9. 2
      readme.md
  10. 1
      src/fp.cpp
  11. 4
      test/bench.hpp
  12. 16
      test/common_test.hpp

@ -0,0 +1,14 @@
name: test
on: [push]
jobs:
build:
name: test
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- run: make test_ci DEBUG=1 -j3
- run: make clean
- run: make test_ci DEBUG=1 -j3 CXX=clang++
- run: make clean
- run: make test_go

@ -1,17 +0,0 @@
sudo: true
dist: trusty
language: cpp
compiler:
- gcc
- clang
addons:
apt:
packages:
- libgmp-dev
script:
- make test_ci DEBUG=1 -j3
- make clean
- make test_ci CFLAGS_USER=-DMCL_DONT_USE_XBYAK -j3
- make clean
- make test_go

@ -91,7 +91,7 @@ else
CFLAGS_OPT+=-O3 CFLAGS_OPT+=-O3
else else
ifeq ($(shell expr $(GCC_VER) \> 4.6.0),1) ifeq ($(shell expr $(GCC_VER) \> 4.6.0),1)
CFLAGS_OPT+=-Ofast CFLAGS_OPT+=-O3
else else
CFLAGS_OPT+=-O3 CFLAGS_OPT+=-O3
endif endif

@ -301,7 +301,7 @@ void dblJacobi(E& R, const E& P)
xy += xy; // 4xy^2 xy += xy; // 4xy^2
switch (E::specialA_) { switch (E::specialA_) {
case Zero: case Zero:
F::add(t, x2, x2); F::mul2(t, x2);
x2 += t; x2 += t;
break; break;
case Minus3: case Minus3:
@ -312,7 +312,7 @@ void dblJacobi(E& R, const E& P)
F::sqr(t, t); F::sqr(t, t);
x2 -= t; x2 -= t;
} }
F::add(t, x2, x2); F::mul2(t, x2);
x2 += t; x2 += t;
break; break;
case GenericA: case GenericA:
@ -325,7 +325,7 @@ void dblJacobi(E& R, const E& P)
t *= E::a_; t *= E::a_;
} }
t += x2; t += x2;
x2 += x2; F::mul2(x2, x2);
x2 += t; x2 += t;
break; break;
} }
@ -337,12 +337,12 @@ void dblJacobi(E& R, const E& P)
} else { } else {
F::mul(R.z, P.y, P.z); F::mul(R.z, P.y, P.z);
} }
R.z += R.z; F::mul2(R.z, R.z);
F::sub(R.y, xy, R.x); F::sub(R.y, xy, R.x);
R.y *= x2; R.y *= x2;
y2 += y2; F::mul2(y2, y2);
y2 += y2; F::mul2(y2, y2);
y2 += y2; F::mul2(y2, y2);
R.y -= y2; R.y -= y2;
} }

@ -165,6 +165,8 @@ public:
if (sqr == 0) sqr = sqrC; if (sqr == 0) sqr = sqrC;
mul2 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul2A_); mul2 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul2A_);
if (mul2 == 0) mul2 = mul2C; if (mul2 == 0) mul2 = mul2C;
mul9 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul9A_);
if (mul9 == 0) mul9 = mul9C;
#endif #endif
*pb = true; *pb = true;
} }
@ -499,6 +501,8 @@ public:
static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); } static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
static void (*mul2)(FpT& y, const FpT& x); static void (*mul2)(FpT& y, const FpT& x);
static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); } static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
static void (*mul9)(FpT& y, const FpT& x);
static inline void mul9C(FpT& y, const FpT& x) { mulSmall(y, x, 9); }
#else #else
static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); } static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); } static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); }
@ -506,9 +510,20 @@ public:
static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); } static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); } static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); } static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
static inline void mul9(FpT& y, const FpT& x) { mulSmall(y, x, 9); }
#endif #endif
static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); } static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); } static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y)
{
assert(y <= op_.smallModp.maxMulN);
Unit xy[maxSize + 1];
op_.fp_mulUnitPre(xy, x.v_, y);
int v = op_.smallModp.approxMul(xy);
const Unit *pv = op_.smallModp.getPmul(v);
op_.fp_subPre(z.v_, xy, pv);
op_.fp_sub(z.v_, z.v_, op_.p, op_.p);
}
static inline void mulUnit(FpT& z, const FpT& x, const Unit y) static inline void mulUnit(FpT& z, const FpT& x, const Unit y)
{ {
if (mulSmallUnit(z, x, y)) return; if (mulSmallUnit(z, x, y)) return;
@ -746,6 +761,7 @@ template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::neg)(FpT& y,
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul)(FpT& z, const FpT& x, const FpT& y); template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul)(FpT& z, const FpT& x, const FpT& y);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y, const FpT& x); template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y, const FpT& x);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul2)(FpT& y, const FpT& x); template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul2)(FpT& y, const FpT& x);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul9)(FpT& y, const FpT& x);
#endif #endif
} // mcl } // mcl

@ -942,6 +942,85 @@ public:
#endif #endif
}; };
/*
x mod p for a small value x < (pMulTblN * p).
*/
struct SmallModp {
typedef mcl::fp::Unit Unit;
static const size_t unitBitSize = sizeof(Unit) * 8;
static const size_t maxTblSize = (MCL_MAX_BIT_SIZE + unitBitSize - 1) / unitBitSize + 1;
static const size_t maxMulN = 9;
static const size_t pMulTblN = maxMulN + 1;
int N_;
uint32_t shiftL_;
uint32_t shiftR_;
uint32_t maxIdx_;
// pMulTbl_[i] = (p * i) >> (pBitSize_ - 1)
Unit pMulTbl_[pMulTblN][maxTblSize];
// idxTbl_[x] = (x << (pBitSize_ - 1)) / p
uint8_t idxTbl_[pMulTblN * 2];
// return x >> (pBitSize_ - 1)
SmallModp()
: N_(0)
, shiftL_(0)
, shiftR_(0)
, maxIdx_(0)
, pMulTbl_()
, idxTbl_()
{
}
// return argmax { i : x > i * p }
uint32_t approxMul(const Unit *x) const
{
uint32_t top = getTop(x);
assert(top <= maxIdx_);
return idxTbl_[top];
}
const Unit *getPmul(size_t v) const
{
assert(v < pMulTblN);
return pMulTbl_[v];
}
uint32_t getTop(const Unit *x) const
{
return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_);
}
uint32_t cvtInt(const mpz_class& x) const
{
assert(mcl::gmp::getUnitSize(x) <= 1);
if (x == 0) {
return 0;
} else {
return uint32_t(mcl::gmp::getUnit(x)[0]);
}
}
void init(const mpz_class& p)
{
size_t pBitSize = mcl::gmp::getBitSize(p);
N_ = (pBitSize + unitBitSize - 1) / unitBitSize;
shiftR_ = (pBitSize - 1) % unitBitSize;
shiftL_ = unitBitSize - shiftR_;
mpz_class t = 0;
for (size_t i = 0; i < pMulTblN; i++) {
bool b;
mcl::gmp::getArray(&b, pMulTbl_[i], maxTblSize, t);
assert(b);
(void)b;
if (i == pMulTblN - 1) {
maxIdx_ = getTop(pMulTbl_[i]);
assert(maxIdx_ < CYBOZU_NUM_OF_ARRAY(idxTbl_));
break;
}
t += p;
}
for (uint32_t i = 0; i <= maxIdx_; i++) {
idxTbl_[i] = cvtInt((mpz_class(int(i)) << (pBitSize - 1)) / p);
}
}
};
/* /*
Barrett Reduction Barrett Reduction
for non GMP version for non GMP version

@ -191,6 +191,7 @@ struct Op {
uint32_t pmod4; uint32_t pmod4;
mcl::SquareRoot sq; mcl::SquareRoot sq;
mcl::Modp modp; mcl::Modp modp;
mcl::SmallModp smallModp;
Unit half[maxUnitSize]; // (p + 1) / 2 Unit half[maxUnitSize]; // (p + 1) / 2
Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery) Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery)
/* /*
@ -215,6 +216,7 @@ struct Op {
void3u fp_mulA_; void3u fp_mulA_;
void2u fp_sqrA_; void2u fp_sqrA_;
void2u fp_mul2A_; void2u fp_mul2A_;
void2u fp_mul9A_;
void3u fp2_addA_; void3u fp2_addA_;
void3u fp2_subA_; void3u fp2_subA_;
void2u fp2_negA_; void2u fp2_negA_;
@ -304,6 +306,7 @@ struct Op {
fp_mulA_ = 0; fp_mulA_ = 0;
fp_sqrA_ = 0; fp_sqrA_ = 0;
fp_mul2A_ = 0; fp_mul2A_ = 0;
fp_mul9A_ = 0;
fp2_addA_ = 0; fp2_addA_ = 0;
fp2_subA_ = 0; fp2_subA_ = 0;
fp2_negA_ = 0; fp2_negA_ = 0;

@ -0,0 +1,13 @@
p=21888242871839275222246405745257275088696311157297823662689037894645226208583
print("over 253 bit")
for i in range (10):
print(i, (p * i) >> 253)
def maxarg(x):
return x // p
print("maxarg")
for i in range(16):
print(i, maxarg(i << 253))

@ -1,4 +1,4 @@
[![Build Status](https://api.travis-ci.com/herumi/mcl.svg?branch=master)](https://travis-ci.com/github/herumi/mcl) [![Build Status](https://github.com/herumi/mcl/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/mcl/actions/workflows/main.yml)
# mcl # mcl

@ -639,6 +639,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
if (!b) return false; if (!b) return false;
} }
modp.init(mp); modp.init(mp);
smallModp.init(mp);
return fp::initForMont(*this, p, mode); return fp::initForMont(*this, p, mode);
} }

@ -116,6 +116,10 @@ void testBench(const G1& P, const G2& Q)
CYBOZU_BENCH_C("Fp::sub ", C3, Fp::sub, x, x, y); CYBOZU_BENCH_C("Fp::sub ", C3, Fp::sub, x, x, y);
CYBOZU_BENCH_C("Fp::add 2 ", C3, Fp::add, x, x, x); CYBOZU_BENCH_C("Fp::add 2 ", C3, Fp::add, x, x, x);
CYBOZU_BENCH_C("Fp::mul2 ", C3, Fp::mul2, x, x); CYBOZU_BENCH_C("Fp::mul2 ", C3, Fp::mul2, x, x);
CYBOZU_BENCH_C("Fp::mulSmall8 ", C3, Fp::mulSmall, x, x, 8);
CYBOZU_BENCH_C("Fp::mulUnit8 ", C3, Fp::mulUnit, x, x, 8);
CYBOZU_BENCH_C("Fp::mul9 ", C3, Fp::mul9, x, x);
CYBOZU_BENCH_C("Fp::mulUnit9 ", C3, Fp::mulUnit, x, x, 9);
CYBOZU_BENCH_C("Fp::neg ", C3, Fp::neg, x, x); CYBOZU_BENCH_C("Fp::neg ", C3, Fp::neg, x, x);
CYBOZU_BENCH_C("Fp::mul ", C3, Fp::mul, x, x, y); CYBOZU_BENCH_C("Fp::mul ", C3, Fp::mul, x, x, y);
CYBOZU_BENCH_C("Fp::sqr ", C3, Fp::sqr, x, x); CYBOZU_BENCH_C("Fp::sqr ", C3, Fp::sqr, x, x);

@ -183,8 +183,24 @@ void testFp2Dbl_mul_xi1()
} }
} }
void testMulSmall()
{
puts("testMulSmall");
cybozu::XorShift rg;
for (int y = 0; y < 10; y++) {
for (int i = 0; i < 40; i++) {
Fp x, z1, z2;
x.setByCSPRNG(rg);
Fp::mulSmall(z1, x, y);
z2 = x * y;
CYBOZU_TEST_EQUAL(z1, z2);
}
}
}
void testCommon(const G1& P, const G2& Q) void testCommon(const G1& P, const G2& Q)
{ {
testMulSmall();
testFp2Dbl_mul_xi1(); testFp2Dbl_mul_xi1();
testABCD(); testABCD();
testMul2(); testMul2();

Loading…
Cancel
Save