mulPre6 + mont is a little faster

dev
MITSUNARI Shigeo 6 years ago
parent 8ac1d066e4
commit 98a32e5c16
  1. 35
      src/fp_generator.hpp
  2. 4
      test/bls12_test.cpp

@ -315,11 +315,11 @@ private:
func = gen_fpDbl_sqrPre(op); func = gen_fpDbl_sqrPre(op);
if (func) op.fpDbl_sqrPreA_ = reinterpret_cast<void2u>(func); if (func) op.fpDbl_sqrPreA_ = reinterpret_cast<void2u>(func);
if (op.N > 4) return; func = gen_mul();
align(16); if (func) {
op.fp_mul = getCurr<void4u>(); // used in toMont/fromMont op.fp_mul = reinterpret_cast<void4u>(func); // used in toMont/fromMont
op.fp_mulA_ = getCurr<void3u>(); op.fp_mulA_ = reinterpret_cast<void3u>(func);
gen_mul(); }
if (op.N > 4) return; if (op.N > 4) return;
align(16); align(16);
op.fp_sqrA_ = getCurr<void2u>(); op.fp_sqrA_ = getCurr<void2u>();
@ -817,19 +817,25 @@ private:
mov(ptr [pz + (pn_ - 1) * 8], *t0); mov(ptr [pz + (pn_ - 1) * 8], *t0);
return func; return func;
} }
void gen_mul() const void* gen_mul()
{ {
align(16);
const void* func = getCurr<void*>();
if (op_->primeMode == PM_NIST_P192) { if (op_->primeMode == PM_NIST_P192) {
StackFrame sf(this, 3, 10 | UseRDX, 8 * 6); StackFrame sf(this, 3, 10 | UseRDX, 8 * 6);
mulPre3(rsp, sf.p[1], sf.p[2], sf.t); mulPre3(rsp, sf.p[1], sf.p[2], sf.t);
fpDbl_mod_NIST_P192(sf.p[0], rsp, sf.t); fpDbl_mod_NIST_P192(sf.p[0], rsp, sf.t);
return func;
} }
if (pn_ == 3) { if (pn_ == 3) {
gen_montMul3(); gen_montMul3();
} else if (pn_ == 4) { return func;
}
if (pn_ == 4) {
gen_montMul4(); gen_montMul4();
#if 1 return func;
} else if (pn_ == 6 && useAdx_) { }
if (pn_ == 6 && useAdx_) {
// gen_montMul6(p_, rp_); // gen_montMul6(p_, rp_);
StackFrame sf(this, 3, 10 | UseRDX, (1 + 12) * 8); StackFrame sf(this, 3, 10 | UseRDX, (1 + 12) * 8);
mov(ptr[rsp + 12 * 8], gp0); mov(ptr[rsp + 12 * 8], gp0);
@ -838,12 +844,15 @@ private:
mov(gp0, ptr[rsp + 12 * 8]); mov(gp0, ptr[rsp + 12 * 8]);
mov(gp1, rsp); mov(gp1, rsp);
call(fpDbl_modL); call(fpDbl_modL);
#endif return func;
} else if (pn_ <= 9) { }
#if 0
if (pn_ <= 9) {
gen_montMulN(p_, rp_, pn_); gen_montMulN(p_, rp_, pn_);
} else { return func;
throw cybozu::Exception("mcl:FpGenerator:gen_mul:not implemented for") << pn_;
} }
#endif
return 0;
} }
/* /*
@input (z, xy) @input (z, xy)

@ -696,9 +696,11 @@ if(0){
// exit(1); // exit(1);
} }
// CYBOZU_BENCH_C("subDbl", 10000000, FpDbl::sub, dx, dx, dx); // CYBOZU_BENCH_C("subDbl", 10000000, FpDbl::sub, dx, dx, dx);
CYBOZU_BENCH_C("mul", 10000000 / n, f, xv, yv, xv);
CYBOZU_BENCH_C("mulPre", 100000000, FpDbl::mulPre, dx, xv[0], yv[0]); CYBOZU_BENCH_C("mulPre", 100000000, FpDbl::mulPre, dx, xv[0], yv[0]);
CYBOZU_BENCH_C("sqrPre", 100000000, FpDbl::sqrPre, dx, xv[0]); CYBOZU_BENCH_C("sqrPre", 100000000, FpDbl::sqrPre, dx, xv[0]);
CYBOZU_BENCH_C("mod ", 100000000, FpDbl::mod, xv[0], dx);
CYBOZU_BENCH_C("mul ", 100000000, Fp::mul, xv[0], yv[0], xv[0]);
CYBOZU_BENCH_C("sqr ", 100000000, Fp::sqr, xv[0], xv[0]);
return 0; return 0;
#endif #endif
return cybozu::test::autoRun.run(argc, argv); return cybozu::test::autoRun.run(argc, argv);

Loading…
Cancel
Save