enable Fp2Dbl::sqrPre

update-fork
MITSUNARI Shigeo 4 years ago
parent fcc57b16fa
commit 5dc683fea3
  1. 13
      src/fp_generator.hpp
  2. 2
      src/fp_static_code.hpp

@ -3506,9 +3506,7 @@ private:
void2u gen_fp2Dbl_sqrPre() void2u gen_fp2Dbl_sqrPre()
{ {
if (isFullBit_) return 0; if (isFullBit_) return 0;
// if (pn_ != 4 && !(pn_ == 6 && useMulx_ && useAdx_)) return 0; if (pn_ > 6) return 0;
// almost same for pn_ == 6
if (pn_ != 4) return 0;
void2u func = getCurr<void2u>(); void2u func = getCurr<void2u>();
const RegExp y = rsp + 0 * 8; const RegExp y = rsp + 0 * 8;
const RegExp x = rsp + 1 * 8; const RegExp x = rsp + 1 * 8;
@ -3520,7 +3518,7 @@ private:
mov(ptr [x], gp1); mov(ptr [x], gp1);
Pack t = sf.t; Pack t = sf.t;
if (pn_ == 6) { if (pn_ == 6) {
t.append(rax); t.append(gp2);
t.append(rdx); t.append(rdx);
} }
const Pack a = t.sub(0, pn_); const Pack a = t.sub(0, pn_);
@ -3544,12 +3542,7 @@ private:
mov(gp2, ptr [x]); mov(gp2, ptr [x]);
call(mulPreL); call(mulPreL);
mov(gp0, ptr [x]); mov(gp0, ptr [x]);
if (pn_ == 4) { gen_raw_fp_sub_2(t1, gp0, gp0 + FpByte_, t, false);
gen_raw_fp_sub(t1, gp0, gp0 + FpByte_, sf.t, false);
} else {
assert(pn_ == 6);
gen_raw_fp_sub6(t1, gp0, gp0, FpByte_, a, false);
}
mov(gp0, ptr [y]); mov(gp0, ptr [y]);
lea(gp1, ptr [t1]); lea(gp1, ptr [t1]);
lea(gp2, ptr [t2]); lea(gp2, ptr [t2]);

@ -83,7 +83,7 @@ void setStaticCode(mcl::fp::Op& op)
op.fp2_mul2A_ = mclx_Fp2_mul2; op.fp2_mul2A_ = mclx_Fp2_mul2;
op.fp2_mul_xiA_ = mclx_Fp2_mul_xi; op.fp2_mul_xiA_ = mclx_Fp2_mul_xi;
op.fp2Dbl_mulPreA_ = mclx_Fp2Dbl_mulPre; op.fp2Dbl_mulPreA_ = mclx_Fp2Dbl_mulPre;
op.fp2Dbl_sqrPreA_ = 0;//mclx_Fp2Dbl_sqrPre; op.fp2Dbl_sqrPreA_ = mclx_Fp2Dbl_sqrPre;
op.fp2Dbl_mul_xiA_ = mclx_Fp2Dbl_mul_xi; op.fp2Dbl_mul_xiA_ = mclx_Fp2Dbl_mul_xi;
op.fp_preInv = mclx_Fp_preInv; op.fp_preInv = mclx_Fp_preInv;
} else { } else {

Loading…
Cancel
Save