From 1d2f4cb29b522ae93791943e421f8f0639dfaf4c Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 28 Aug 2019 05:03:25 +0900 Subject: [PATCH] optimize millerLoopVec --- include/mcl/bn.hpp | 98 ++++++++++++++++++++++++++++++++++++++++------ test/bench.hpp | 15 +++++++ 2 files changed, 102 insertions(+), 11 deletions(-) diff --git a/include/mcl/bn.hpp b/include/mcl/bn.hpp index 90a202b..15ebca8 100644 --- a/include/mcl/bn.hpp +++ b/include/mcl/bn.hpp @@ -1633,10 +1633,12 @@ inline void millerLoop(Fp12& f, const G1& P_, const G2& Q_) } } if (BN::param.z < 0) { - G2::neg(T, T); Fp6::neg(f.b, f.b); } if (BN::param.isBLS12) return; + if (BN::param.z < 0) { + G2::neg(T, T); + } Frobenius(Q, Q); addLine(d, T, Q, P); Frobenius(Q, Q); @@ -1900,24 +1902,98 @@ inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1, const G2& Q1, con } #endif +template +inline void millerLoopVecN(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n) +{ + assert(n <= N); + G1 P[N]; + G2 Q[N]; + // remove zero elements + { + size_t realN = 0; + for (size_t i = 0; i < n; i++) { + if (!Pvec[i].isZero() && !Qvec[i].isZero()) { + G1::normalize(P[realN], Pvec[i]); + G2::normalize(Q[realN], Qvec[i]); + realN++; + } + } + if (realN <= 0) { + f = 1; + return; + } + n = realN; // update n + } + // all P[] and Q[] are not zero + G2 T[N], negQ[N]; + G1 adjP[N]; + Fp6 d, e; + for (size_t i = 0; i < n; i++) { + T[i] = Q[i]; + if (BN::param.useNAF) { + G2::neg(negQ[i], Q[i]); + } + makeAdjP(adjP[i], P[i]); + dblLine(d, T[i], adjP[i]); + addLine(e, T[i], Q[i], P[i]); + if (i == 0) { + mulSparse2(f, d, e); + } else { + Fp12 ft; + mulSparse2(ft, d, e); + f *= ft; + } + } + for (size_t j = 2; j < BN::param.siTbl.size(); j++) { + Fp12::sqr(f, f); + for (size_t i = 0; i < n; i++) { + dblLine(e, T[i], adjP[i]); + mulSparse(f, e); + int v = BN::param.siTbl[j]; + if (v) { + if (v > 0) { + addLine(e, T[i], Q[i], P[i]); + } else { + addLine(e, T[i], negQ[i], P[i]); + } + mulSparse(f, e); + } + } + } + if (BN::param.z < 0) { + Fp6::neg(f.b, f.b); + } + if (BN::param.isBLS12) return; + for (size_t i = 0; i < n; i++) { + if (BN::param.z < 0) { + G2::neg(T[i], T[i]); + } + Frobenius(Q[i], Q[i]); + addLine(d, T[i], Q[i], P[i]); + Frobenius(Q[i], Q[i]); + G2::neg(Q[i], Q[i]); + addLine(e, T[i], Q[i], P[i]); + Fp12 ft; + mulSparse2(ft, d, e); + f *= ft; + } +} /* f = prod_{i=0}^{n-1} millerLoop(Pvec[i], Qvec[i]) */ inline void millerLoopVec(Fp12& f, const G1* Pvec, const G2* Qvec, size_t n) { - if (n == 0) { - f = 1; - return; - } - millerLoop(f, Pvec[0], Qvec[0]); - for (size_t i = 1; i < n; i++) { - Fp12 g; - millerLoop(g, Pvec[i], Qvec[i]); - f *= g; + const size_t N = 16; + size_t remain = fp::min_(N, n); + millerLoopVecN(f, Pvec, Qvec, remain); + for (size_t i = remain; i < n; i += N) { + remain = fp::min_(n - i, N); + Fp12 ft; + millerLoopVecN(ft, Pvec + i, Qvec + i, remain); + f *= ft; } } - inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calcG1(P, x); } inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calcG2(P, x); } #ifndef CYBOZU_DONT_USE_EXCEPTION diff --git a/test/bench.hpp b/test/bench.hpp index cc1639e..cf2a728 100644 --- a/test/bench.hpp +++ b/test/bench.hpp @@ -141,6 +141,21 @@ void testBench(const G1& P, const G2& Q) CYBOZU_BENCH_C("precomputeG2 ", C, precomputeG2, Qcoeff, Q); precomputeG2(Qcoeff, Q); CYBOZU_BENCH_C("precomputedML ", C, precomputedMillerLoop, e2, P, Qcoeff); + const size_t n = 7; + G1 Pvec[n]; + G2 Qvec[n]; + for (size_t i = 0; i < n; i++) { + char d = (char)(i + 1); + hashAndMapToG1(Pvec[i], &d, 1); + hashAndMapToG2(Qvec[i], &d, 1); + } + e2 = 1; + for (size_t i = 0; i < n; i++) { + millerLoop(e1, Pvec[i], Qvec[i]); + e2 *= e1; + } + CYBOZU_BENCH_C("millerLoopVec ", 3000, millerLoopVec, e1, Pvec, Qvec, n); + CYBOZU_TEST_EQUAL(e1, e2); } inline void SquareRootPrecomputeTest(const mpz_class& p)