dev
MITSUNARI Shigeo 6 years ago
parent 629496d1a1
commit d80d82fdb4
  1. 43
      src/fp_generator.hpp

@ -836,6 +836,7 @@ private:
gen_montMul4();
return func;
}
return 0;
if (pn_ == 6 && useMulx_ && useAdx_) {
// gen_montMul6(p_, rp_);
StackFrame sf(this, 3, 10 | UseRDX, (1 + 12) * 8);
@ -1183,6 +1184,27 @@ private:
gen_montSqr3();
return func;
}
if (pn_ == 4 && useMulx_) {
#if 1
// sqr(y, x) = mul(y, x, x)
#ifdef XBYAK64_WIN
mov(r8, rdx);
#else
mov(rdx, rsi);
#endif
jmp((const void*)op_->fp_mulA_);
#else // (sqrPre + mod) is slower than mul
StackFrame sf(this, 3, 10 | UseRDX, 8 * 8);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre4(rsp, sf.p[1], t);
mov(gp0, sf.p[0]);
mov(gp1, rsp);
call(fpDbl_modL);
#endif
return func;
}
return 0;
if (pn_ == 6 && useMulx_ && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, (1 + 12) * 8);
mov(ptr[rsp + 12 * 8], gp0);
@ -1194,26 +1216,7 @@ private:
call(fpDbl_modL);
return func;
}
#if 0 // (sqrPre + mod) is slower than mul
if (pn_ == 4 && useMulx_) {
StackFrame sf(this, 3, 10 | UseRDX, 8 * 8);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre4(rsp, sf.p[1], t);
mov(gp0, sf.p[0]);
mov(gp1, rsp);
call(fpDbl_modL);
return func;
}
#endif
// sqr(y, x) = mul(y, x, x)
#ifdef XBYAK64_WIN
mov(r8, rdx);
#else
mov(rdx, rsi);
#endif
jmp((const void*)op_->fp_mulA_);
return func;
return 0;
}
/*
input (pz[], px[], py[])

Loading…
Cancel
Save