update-fork
MITSUNARI Shigeo 4 years ago
parent 28c8a0b489
commit bcf5961f70
  1. 6
      include/mcl/fp.hpp
  2. 4
      include/mcl/op.hpp
  3. 24
      src/fp.cpp
  4. 2
      test/bench.hpp
  5. 14
      test/fp_test.cpp

@ -163,6 +163,8 @@ public:
if (mul == 0) mul = mulC; if (mul == 0) mul = mulC;
sqr = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_sqrA_); sqr = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_sqrA_);
if (sqr == 0) sqr = sqrC; if (sqr == 0) sqr = sqrC;
mul2 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul2A_);
if (mul2 == 0) mul2 = mul2C;
#endif #endif
*pb = true; *pb = true;
} }
@ -495,12 +497,15 @@ public:
static inline void mulC(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); } static inline void mulC(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
static void (*sqr)(FpT& y, const FpT& x); static void (*sqr)(FpT& y, const FpT& x);
static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); } static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
static void (*mul2)(FpT& y, const FpT& x);
static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
#else #else
static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); } static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); } static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); }
static inline void neg(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_, op_.p); } static inline void neg(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_, op_.p); }
static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); } static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); } static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
#endif #endif
static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); } static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); } static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
@ -740,6 +745,7 @@ template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sub)(FpT& z,
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::neg)(FpT& y, const FpT& x); template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::neg)(FpT& y, const FpT& x);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul)(FpT& z, const FpT& x, const FpT& y); template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul)(FpT& z, const FpT& x, const FpT& y);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y, const FpT& x); template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y, const FpT& x);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul2)(FpT& y, const FpT& x);
#endif #endif
} // mcl } // mcl

@ -214,6 +214,7 @@ struct Op {
void2u fp_negA_; void2u fp_negA_;
void3u fp_mulA_; void3u fp_mulA_;
void2u fp_sqrA_; void2u fp_sqrA_;
void2u fp_mul2A_;
void3u fp2_addA_; void3u fp2_addA_;
void3u fp2_subA_; void3u fp2_subA_;
void2u fp2_negA_; void2u fp2_negA_;
@ -231,6 +232,7 @@ struct Op {
void1u fp_clear; void1u fp_clear;
void2u fp_copy; void2u fp_copy;
void2u fp_shr1; void2u fp_shr1;
void3u fp_mul2;
void3u fp_neg; void3u fp_neg;
void4u fp_add; void4u fp_add;
void4u fp_sub; void4u fp_sub;
@ -299,6 +301,7 @@ struct Op {
fp_negA_ = 0; fp_negA_ = 0;
fp_mulA_ = 0; fp_mulA_ = 0;
fp_sqrA_ = 0; fp_sqrA_ = 0;
fp_mul2A_ = 0;
fp2_addA_ = 0; fp2_addA_ = 0;
fp2_subA_ = 0; fp2_subA_ = 0;
fp2_negA_ = 0; fp2_negA_ = 0;
@ -316,6 +319,7 @@ struct Op {
fp_clear = 0; fp_clear = 0;
fp_copy = 0; fp_copy = 0;
fp_shr1 = 0; fp_shr1 = 0;
fp_mul2 = 0;
fp_neg = 0; fp_neg = 0;
fp_add = 0; fp_add = 0;
fp_sub = 0; fp_sub = 0;

@ -261,6 +261,28 @@ struct SetFpDbl<N, true> {
} }
}; };
// assume !isFullBit
template<size_t N>
void Mul2(Unit *y, const Unit *x, const Unit *p)
{
const size_t bit = 1;
const size_t rBit = sizeof(Unit) * 8 - bit;
Unit prev = x[N - 1];
for (size_t i = N - 1; i > 0; i--) {
Unit t = x[i - 1];
y[i] = (prev << bit) | (t >> rBit);
prev = t;
}
y[0] = prev << bit;
for (size_t i = 0; i < N; i++) {
Unit a = y[N - 1 - i];
Unit b = p[N - 1 - i];
if (a < b) return;
if (a > b) break;
}
SubPre<N, Gtag>::f(y, y, p);
}
template<size_t N, class Tag, bool enableFpDbl, bool gmpIsFasterThanLLVM> template<size_t N, class Tag, bool enableFpDbl, bool gmpIsFasterThanLLVM>
void setOp2(Op& op) void setOp2(Op& op)
{ {
@ -269,9 +291,11 @@ void setOp2(Op& op)
if (op.isFullBit) { if (op.isFullBit) {
op.fp_add = Add<N, true, Tag>::f; op.fp_add = Add<N, true, Tag>::f;
op.fp_sub = Sub<N, true, Tag>::f; op.fp_sub = Sub<N, true, Tag>::f;
op.fp_mul2 = 0; // not supported
} else { } else {
op.fp_add = Add<N, false, Tag>::f; op.fp_add = Add<N, false, Tag>::f;
op.fp_sub = Sub<N, false, Tag>::f; op.fp_sub = Sub<N, false, Tag>::f;
op.fp_mul2 = Mul2<N>;
} }
if (op.isMont) { if (op.isMont) {
if (op.isFullBit) { if (op.isFullBit) {

@ -102,6 +102,8 @@ void testBench(const G1& P, const G2& Q)
#endif #endif
CYBOZU_BENCH_C("Fp::add ", C3, Fp::add, x, x, y); CYBOZU_BENCH_C("Fp::add ", C3, Fp::add, x, x, y);
CYBOZU_BENCH_C("Fp::sub ", C3, Fp::sub, x, x, y); CYBOZU_BENCH_C("Fp::sub ", C3, Fp::sub, x, x, y);
CYBOZU_BENCH_C("Fp::add 2 ", C3, Fp::add, x, x, x);
CYBOZU_BENCH_C("Fp::mul2 ", C3, Fp::mul2, x, x);
CYBOZU_BENCH_C("Fp::neg ", C3, Fp::neg, x, x); CYBOZU_BENCH_C("Fp::neg ", C3, Fp::neg, x, x);
CYBOZU_BENCH_C("Fp::mul ", C3, Fp::mul, x, x, y); CYBOZU_BENCH_C("Fp::mul ", C3, Fp::mul, x, x, y);
CYBOZU_BENCH_C("Fp::sqr ", C3, Fp::sqr, x, x); CYBOZU_BENCH_C("Fp::sqr ", C3, Fp::sqr, x, x);

@ -919,6 +919,19 @@ CYBOZU_TEST_AUTO(mod_NIST_P521)
} }
#endif #endif
void mul2Test()
{
if (Fp::getOp().isFullBit) return;
const int x0 = 1234567;
Fp x = x0;
mpz_class mx = x0;
for (size_t i = 0; i < 100; i++) {
Fp::mul2(x, x);
mx = (mx * 2) % Fp::getOp().mp;
CYBOZU_TEST_EQUAL(mx, x.getMpz());
}
}
void sub(mcl::fp::Mode mode) void sub(mcl::fp::Mode mode)
{ {
printf("mode=%s\n", mcl::fp::ModeToStr(mode)); printf("mode=%s\n", mcl::fp::ModeToStr(mode));
@ -962,6 +975,7 @@ void sub(mcl::fp::Mode mode)
const char *pStr = tbl[i]; const char *pStr = tbl[i];
printf("prime=%s\n", pStr); printf("prime=%s\n", pStr);
Fp::init(pStr, mode); Fp::init(pStr, mode);
mul2Test();
cstrTest(); cstrTest();
setStrTest(); setStrTest();
streamTest(); streamTest();

Loading…
Cancel
Save