fp_sub without jmp

update-fork
MITSUNARI Shigeo 4 years ago
parent aa6b6d0e15
commit 1fbb857884
  1. 54
      src/fp_generator.hpp

@ -755,6 +755,21 @@ private:
}
return 0;
}
void gen_raw_fp_sub_2(const RegExp& pz, const RegExp& px, const RegExp& py, const Pack& t, bool withCarry)
{
Pack t1 = t.sub(0, pn_);
Pack t2 = t.sub(pn_, pn_);
load_rm(t1, px);
sub_rm(t1, py, withCarry);
push(t1[0]);
lea(t1[0], ptr[rip + pL_]);
load_rm(t2, t1[0]);
sbb(t1[0], t1[0]);
and_pr(t2, t1[0]);
pop(t1[0]);
add_rr(t1, t2);
store_mr(pz, t1);
}
void gen_raw_fp_sub6(const RegExp& pz, const RegExp& px, const RegExp& py, int offset, const Pack& t, bool withCarry)
{
load_rm(t, px + offset);
@ -767,39 +782,22 @@ private:
L("@@");
store_mr(pz + offset, t);
}
void gen_fp_sub6()
{
StackFrame sf(this, 3, 4);
const Reg64& pz = sf.p[0];
const Reg64& px = sf.p[1];
const Reg64& py = sf.p[2];
Pack t = sf.t;
t.append(rax);
t.append(px); // |t| = 6
gen_raw_fp_sub6(pz, px, py, 0, t, false);
}
void3u gen_fp_sub()
{
if (pn_ > 6) return 0;
void3u func = getCurr<void3u>();
if (pn_ <= 4) {
gen_fp_sub_le4();
return func;
}
if (pn_ == 6) {
gen_fp_sub6();
return func;
}
StackFrame sf(this, 3);
/*
micro-benchmark of jmp is faster than and-mask
but it's slower for pairings
*/
int n = pn_ * 2 - 1;
StackFrame sf(this, 3, n);
const Reg64& pz = sf.p[0];
const Reg64& px = sf.p[1];
const Reg64& py = sf.p[2];
const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR;
Label exit;
gen_raw_sub(pz, px, py, rax, pn_);
jnc(exit, jmpMode);
lea(px, ptr[rip+pL_]);
gen_raw_add(pz, pz, px, rax, pn_);
L(exit);
Pack t = sf.t;
t.append(rax);
gen_raw_fp_sub_2(pz, px, py, t, false);
return func;
}
void2u gen_fp_neg()
@ -3513,8 +3511,6 @@ private:
// almost same for pn_ == 6
if (pn_ != 4) return 0;
void2u func = getCurr<void2u>();
// almost same for pn_ == 6
if (pn_ != 4) return 0;
const RegExp y = rsp + 0 * 8;
const RegExp x = rsp + 1 * 8;
const Ext1 t1(FpByte_, rsp, 2 * 8);

Loading…
Cancel
Save