dev
MITSUNARI Shigeo 9 years ago
parent aa930b78c1
commit 4667fde466
  1. 14
      include/mcl/fp.hpp
  2. 16
      include/mcl/fp_generator.hpp
  3. 5
      sample/bench.cpp
  4. 15
      src/fp.cpp

@ -98,6 +98,7 @@ public:
op_.useMont = mode == fp::FP_LLVM_MONT || mode == fp::FP_XBYAK;
if (mode == fp::FP_LLVM_MONT) {
op_.mul = montW;
op_.sqr = montSqrW;
}
#if 0
fprintf(stderr, "mode=%d, useMont=%d"
@ -278,7 +279,7 @@ public:
static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.mul(z.v_, x.v_, y.v_); }
static inline void inv(FpT& y, const FpT& x) { op_.invOp(y.v_, x.v_, op_); }
static inline void neg(FpT& y, const FpT& x) { op_.neg(y.v_, x.v_); }
static inline void square(FpT& y, const FpT& x) { op_.mul(y.v_, x.v_, x.v_); }
static inline void square(FpT& y, const FpT& x) { op_.sqr(y.v_, x.v_); }
static inline void div(FpT& z, const FpT& x, const FpT& y)
{
FpT rev;
@ -433,10 +434,9 @@ public:
}
static inline void sqrW(Unit *y, const Unit *x)
{
// Unit xx[maxSize * 2];
// op_.sqrPreP(xx, x);
// op_.modP(y, xx, op_.p);
mulW(y, x, x);
Unit xx[maxSize * 2];
op_.sqrPreP(xx, x);
op_.modP(y, xx, op_.p);
}
static inline void negW(Unit *y, const Unit *x)
{
@ -447,6 +447,10 @@ public:
{
op_.mont(z, x, y, op_.p, op_.rp);
}
static inline void montSqrW(Unit *y, const Unit *x)
{
op_.mont(y, x, x, op_.p, op_.rp);
}
void normalize() {} // dummy method
};

@ -219,9 +219,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
gen_mul();
align(16);
sqr_ = getCurr<void2op>();
if (!gen_sqr()) {
sqr_ = 0;
}
gen_sqr();
align(16);
shr1_ = getCurr<void2op>();
gen_shr1();
@ -547,13 +545,19 @@ struct FpGenerator : Xbyak::CodeGenerator {
throw cybozu::Exception("mcl:FpGenerator:gen_mul:not implemented for") << pn_;
}
}
bool gen_sqr()
void gen_sqr()
{
if (pn_ == 3) {
gen_montSqr3(p_, pp_);
return true;
return;
}
return false;
// sqr(y, x) = mul(y, x, x)
#ifdef XBYAK_WIN
mov(r8, rdx);
#else
mov(rdx, rsi);
#endif
jmp((void*)mul_);
}
/*
input (pz[], px[], py[])

@ -27,12 +27,13 @@ void benchFpSub(const char *pStr, const char *xStr, const char *yStr, mcl::fp::M
Fp x(xStr);
Fp y(yStr);
double addT, subT, mulT, invT;
double addT, subT, mulT, sqrT, invT;
CYBOZU_BENCH_T(addT, Fp::add, x, x, x);
CYBOZU_BENCH_T(subT, Fp::sub, x, x, y);
CYBOZU_BENCH_T(mulT, Fp::mul, x, x, x);
CYBOZU_BENCH_T(sqrT, Fp::square, x, x);
CYBOZU_BENCH_T(invT, x += y;Fp::inv, x, x); // avoid same jmp
printf("%10s bit % 3d add %8.2f sub %8.2f mul %8.2f inv %8.2f\n", s, (int)Fp::getBitSize(), addT, subT, mulT, invT);
printf("%10s bit % 3d add %8.2f sub %8.2f mul %8.2f sqr %8.2f inv %8.2f\n", s, (int)Fp::getBitSize(), addT, subT, mulT, sqrT, invT);
}
void benchFp(size_t bitSize, int mode)

@ -133,12 +133,11 @@ struct OpeFunc {
}
static inline void sqrPreC(Unit *y, const Unit *x)
{
// mpz_t mx, my;
// set_zero(my, y, N * 2);
// set_mpz_t(mx, x);
// mpz_mul(my, mx, mx);
// Gmp::getArray(y, N * 2, my);
mulPreC(y, x, x);
mpz_t mx, my;
set_zero(my, y, N * 2);
set_mpz_t(mx, x);
mpz_mul(my, mx, mx);
Gmp::getArray(y, N * 2, my);
}
// x[N * 2] -> y[N]
static inline void modC(Unit *y, const Unit *x, const Unit *p)
@ -270,9 +269,7 @@ static void initForMont(Op& op, const Unit *p, Mode mode)
op.add = Xbyak::CastTo<void3u>(fg->add_);
op.sub = Xbyak::CastTo<void3u>(fg->sub_);
op.mul = Xbyak::CastTo<void3u>(fg->mul_);
// if (fg->sqr_) {
// op.sqr = Xbyak::CastTo<void2u>(fg->sqr_);
// }
op.sqr = Xbyak::CastTo<void2u>(fg->sqr_);
op.preInv = Xbyak::CastTo<int2u>(op.fg->preInv_);
op.invOp = &invOpForMont;

Loading…
Cancel
Save