diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index 4a6ec6c..4de8e9e 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -81,6 +81,8 @@ public: op_.fp_sqr = fp_sqrW; op_.fp_add = fp_addW; op_.fp_sub = fp_subW; + op_.fpDbl_add = fpDbl_addW; + op_.fpDbl_sub = fpDbl_subW; op_.fp_mul = fp_mulW; /* priority : MCL_USE_XBYAK > MCL_USE_LLVM > none @@ -441,6 +443,14 @@ private: { op_.fp_subP(z, x, y, op_.p); } + static inline void fpDbl_addW(Unit *z, const Unit *x, const Unit *y) + { + op_.fpDbl_addP(z, x, y, op_.p); + } + static inline void fpDbl_subW(Unit *z, const Unit *x, const Unit *y) + { + op_.fpDbl_subP(z, x, y, op_.p); + } static inline void fp_mulW(Unit *z, const Unit *x, const Unit *y) { Unit xy[maxSize * 2]; diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 73ad4b6..995f1c9 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -13,6 +13,12 @@ namespace mcl { template struct BnT { typedef fp::Unit Unit; + class FpDbl { + Unit v_[Fp::maxSize * 2]; + public: + static inline void add(FpDbl& z, const FpDbl& x, const FpDbl& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_); } + static inline void sub(FpDbl& z, const FpDbl& x, const FpDbl& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_); } + }; /* beta = -1 Fp2 = F[u] / (u^2 + 1) diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 17550e9..a466455 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -102,6 +102,16 @@ struct Op { void3u fp_modP; FpGenerator *fg; + /* + for FpDbl + */ + void3u fpDbl_add; + void3u fpDbl_sub; + void4u fpDbl_addP; + void4u fpDbl_subP; + void3u fpDbl_addNC; + void3u fpDbl_subNC; + /* for Fp2 = F[u] / (u^2 + 1) x = a + bu @@ -124,6 +134,9 @@ struct Op { , rp(0), mont(0) , fp_negP(0), fp_sqrPreP(0), fp_invOp(0), fp_addP(0), fp_subP(0), fp_mulPreP(0), fp_modP(0) , fg(createFpGenerator()) + , fpDbl_add(0), fpDbl_sub() + , fpDbl_addP(0), fpDbl_subP() + , fpDbl_addNC(0), fpDbl_subNC() , xi_c(0) , fp2_add(0), fp2_sub(0), fp2_mul(0), fp2_neg(0) , fp2_sqr(0), fp2_mul_xi(0) diff --git a/src/fp.cpp b/src/fp.cpp index ff781e4..66ef781 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -93,7 +93,7 @@ struct OpeFunc { { copyArray(y, x, N); } - static inline void fp_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p) + static inline void fp_addPC(Unit *z, const Unit *x, const Unit *y, const Unit *p) { Unit ret[N + 2]; // not N + 1 mpz_t mz, mx, my, mp; @@ -107,7 +107,7 @@ struct OpeFunc { } Gmp::getArray(z, N, mz); } - static inline void fp_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p) + static inline void fp_subPC(Unit *z, const Unit *x, const Unit *y, const Unit *p) { Unit ret[N + 1]; mpz_t mz, mx, my; @@ -122,6 +122,44 @@ struct OpeFunc { } Gmp::getArray(z, N, mz); } + /* + z[N * 2] <- x[N * 2] + y[N * 2] mod p[N] << (N * UnitBitSize) + */ + static inline void fpDbl_addPC(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + Unit ret[N * 2 + 2]; // not N + 1 + Unit pDbl[N * 2]; + mpz_t mz, mx, my, mp; + set_zero(mz, ret, N * 2 + 2); + set_mpz_t(mx, x, N * 2); + set_mpz_t(my, y, N * 2); + memset(pDbl, 0, N * sizeof(Unit)); + memcpy(pDbl + N, p, N * sizeof(Unit)); + set_mpz_t(mp, p, N * 2); + mpz_add(mz, mx, my); + if (mpz_cmp(mz, mp) >= 0) { + mpz_sub(mz, mz, mp); + } + Gmp::getArray(z, N * 2, mz); + } + static inline void fpDbl_subPC(Unit *z, const Unit *x, const Unit *y, const Unit *p) + { + Unit ret[N * 2 + 1]; + Unit pDbl[N * 2]; + mpz_t mz, mx, my; + set_zero(mz, ret, N * 2 + 1); + set_mpz_t(mx, x, N * 2); + set_mpz_t(my, y, N * 2); + mpz_sub(mz, mx, my); + if (mpz_sgn(mz) < 0) { + mpz_t mp; + memset(pDbl, 0, N * sizeof(Unit)); + memcpy(pDbl + N, p, N * sizeof(Unit)); + set_mpz_t(mp, p, N * 2); + mpz_add(mz, mz, mp); + } + Gmp::getArray(z, N * 2, mz); + } // z = x + y without carry static inline void fp_addNCC(Unit *z, const Unit *x, const Unit *y) { @@ -200,7 +238,7 @@ struct OpeFunc { if (x != y) fp_clearC(y); return; } - fp_subC(y, p, x, p); + fp_subPC(y, p, x, p); } }; @@ -234,8 +272,8 @@ struct OpeFunc { } else { \ fp_invOp = OpeFunc::fp_invOpC; \ } \ - fp_addP = OpeFunc::fp_addC; \ - fp_subP = OpeFunc::fp_subC; \ + fp_addP = OpeFunc::fp_addPC; \ + fp_subP = OpeFunc::fp_subPC; \ if (fullBit) { \ fp_addNC = fp_add; \ fp_subNC = fp_sub; \