diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index a17a10b..3ff0e1a 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -135,6 +135,14 @@ public: if (mulSmallUnit(z, x, y)) return; assert(0); // not supported y } + static void sub_p_if_possible(FpDblT& y, const FpDblT& x) + { + const size_t N = Fp::op_.N; + const Unit *xv = &x.v_[N]; + Unit *yv = &y.v_[N]; + static const Unit zero[Fp::maxSize] = {}; + Fp::op_.fp_add(yv, xv, zero, Fp::op_.p); + } static void init() { const mcl::fp::Op& op = Fp::getOp(); @@ -684,6 +692,11 @@ struct Fp2DblT { #endif void operator+=(const Fp2DblT& x) { add(*this, *this, x); } void operator-=(const Fp2DblT& x) { sub(*this, *this, x); } + static void sub_p_if_possible(Fp2DblT& y, const Fp2DblT& x) + { + FpDbl::sub_p_if_possible(y.a, x.a); + FpDbl::sub_p_if_possible(y.b, x.b); + } static void init() { assert(!Fp::getOp().isFullBit); @@ -986,6 +999,39 @@ struct Fp6DblT { const Fp2& d = y.a; const Fp2& e = y.b; const Fp2& f = y.c; +#if 1 + Fp2Dbl& ZA = z.a; + Fp2Dbl& ZB = z.b; + Fp2Dbl& ZC = z.c; + Fp2 t1, t2; + Fp2Dbl BE, CF, AD; + Fp2::addPre(t1, b, c); + Fp2::addPre(t2, e, f); + Fp2Dbl::mulPre(ZA, t1, t2); + Fp2::addPre(t1, a, b); + Fp2::addPre(t2, e, d); + Fp2Dbl::mulPre(ZB, t1, t2); + Fp2::addPre(t1, a, c); + Fp2::addPre(t2, d, f); + Fp2Dbl::mulPre(ZC, t1, t2); + Fp2Dbl::mulPre(BE, b, e); + Fp2Dbl::mulPre(CF, c, f); + Fp2Dbl::mulPre(AD, a, d); + Fp2Dbl::sub(ZA, ZA, BE); + Fp2Dbl::sub(ZA, ZA, CF); +// Fp2Dbl::sub_p_if_possible(ZA, ZA); + Fp2Dbl::sub(ZB, ZB, AD); + Fp2Dbl::sub(ZB, ZB, BE); +// Fp2Dbl::sub_p_if_possible(ZB, ZB); + Fp2Dbl::sub(ZC, ZC, AD); + Fp2Dbl::sub(ZC, ZC, CF); +// Fp2Dbl::sub_p_if_possible(ZC, ZC); + Fp2Dbl::mul_xi(ZA, ZA); + Fp2Dbl::add(ZA, ZA, AD); + Fp2Dbl::mul_xi(CF, CF); + Fp2Dbl::add(ZB, ZB, CF); + Fp2Dbl::add(ZC, ZC, BE); +#else Fp2Dbl& za = z.a; Fp2Dbl& zb = z.b; Fp2Dbl& zc = z.c; @@ -994,7 +1040,7 @@ struct Fp6DblT { Fp2Dbl::mulPre(BE, b, e); Fp2Dbl::mulPre(zb, c, f); - Fp2 t1, t2, t3, t4; + Fp2 t1, t2; Fp2::add(t1, b, c); Fp2::add(t2, e, f); Fp2Dbl T1; @@ -1003,16 +1049,16 @@ struct Fp6DblT { Fp2Dbl::sub(T1, T1, zb); Fp2Dbl::mul_xi(T1, T1); - Fp2::add(t2, a, b); - Fp2::add(t3, e, d); + Fp2::add(t1, a, b); + Fp2::add(t2, e, d); Fp2Dbl T2; - Fp2Dbl::mulPre(T2, t2, t3); + Fp2Dbl::mulPre(T2, t1, t2); Fp2Dbl::sub(T2, T2, za); Fp2Dbl::sub(T2, T2, BE); - Fp2::add(t3, a, c); - Fp2::add(t4, d, f); - Fp2Dbl::mulPre(zc, t3, t4); + Fp2::add(t1, a, c); + Fp2::add(t2, d, f); + Fp2Dbl::mulPre(zc, t1, t2); Fp2Dbl::sub(zc, zc, za); Fp2Dbl::sub(zc, zc, zb); @@ -1020,6 +1066,7 @@ struct Fp6DblT { Fp2Dbl::mul_xi(zb, zb); Fp2Dbl::add(zb, zb, T2); Fp2Dbl::add(zc, zc, BE); +#endif //clk.end(); } static void mod(Fp6& y, const Fp6Dbl& x)