remove mulPreC

update-fork
MITSUNARI Shigeo 4 years ago
parent ebbb5cf6cc
commit 52a9f4d213
  1. 9
      include/mcl/fp_tower.hpp
  2. 2
      include/mcl/op.hpp
  3. 28
      src/fp_generator.hpp
  4. 2
      src/fp_static_code.hpp

@ -113,24 +113,22 @@ public:
static void (*mod)(Fp& z, const FpDblT& xy);
static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
#else
static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
#endif
/*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/
static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
static void (*sqrPre)(FpDblT& xx, const Fp& x);
static void mulUnit(FpDblT& z, const FpDblT& x, Unit y)
{
@ -151,9 +149,8 @@ public:
if (addPre == 0) addPre = addPreC;
subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre);
if (subPre == 0) subPre = subPreC;
mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_);
if (mulPre == 0) mulPre = mulPreC;
#endif
mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPre);
sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPre);
}
void operator+=(const FpDblT& x) { add(*this, *this, x); }
@ -166,8 +163,8 @@ template<class Fp> void (*FpDblT<Fp>::sub)(FpDblT&, const FpDblT&, const FpDblT&
template<class Fp> void (*FpDblT<Fp>::mod)(Fp&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::addPre)(FpDblT&, const FpDblT&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::subPre)(FpDblT&, const FpDblT&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&);
#endif
template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&);
template<class Fp> void (*FpDblT<Fp>::sqrPre)(FpDblT&, const Fp&);
template<class Fp> struct Fp12T;

@ -221,7 +221,6 @@ struct Op {
void2u fp2_sqrA_;
void3u fpDbl_addA_;
void3u fpDbl_subA_;
void3u fpDbl_mulPreA_;
void2u fpDbl_modA_;
void3u fp2Dbl_mulPreA_;
void2u fp2Dbl_sqrPreA_;
@ -307,7 +306,6 @@ struct Op {
fp2_sqrA_ = 0;
fpDbl_addA_ = 0;
fpDbl_subA_ = 0;
fpDbl_mulPreA_ = 0;
fpDbl_modA_ = 0;
fp2Dbl_mulPreA_ = 0;
fp2Dbl_sqrPreA_ = 0;

@ -421,11 +421,11 @@ private:
setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr());
align(16);
op.fpDbl_mulPreA_ = gen_fpDbl_mulPre();
setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr());
gen_fpDbl_mulPre(op.fpDbl_mulPre);
setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr());
align(16);
op.fpDbl_sqrPre = gen_fpDbl_sqrPre();
gen_fpDbl_sqrPre(op.fpDbl_sqrPre);
setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
align(16);
@ -2373,36 +2373,35 @@ private:
cmovc_rr(zp, keep);
store_mr(z, zp);
}
void2u gen_fpDbl_sqrPre()
void gen_fpDbl_sqrPre(void2u& f)
{
void2u func = getCurr<void2u>();
if (pn_ == 2 && useMulx_) {
StackFrame sf(this, 2, 7 | UseRDX);
sqrPre2(sf.p[0], sf.p[1], sf.t);
return func;
f = func;
}
if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre3(sf.p[0], sf.p[1], t);
return func;
f = func;
}
if (pn_ == 4 && useMulx_) {
StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre4(sf.p[0], sf.p[1], t);
return func;
f = func;
}
if (pn_ == 6 && useMulx_ && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre6(sf.p[0], sf.p[1], t);
return func;
f = func;
}
return 0;
#if 0
#ifdef XBYAK64_WIN
mov(r8, rdx);
@ -2413,18 +2412,18 @@ private:
return func;
#endif
}
void3u gen_fpDbl_mulPre()
void gen_fpDbl_mulPre(void3u& f)
{
void3u func = getCurr<void3u>();
if (pn_ == 2 && useMulx_) {
StackFrame sf(this, 3, 5 | UseRDX);
mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t);
return func;
f = func;
}
if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX);
mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t);
return func;
f = func;
}
if (pn_ == 4) {
/*
@ -2437,7 +2436,7 @@ private:
L(mulPreL); // called only from asm code
mulPre4(gp0, gp1, gp2, sf.t);
ret();
return func;
f = func;
}
if (pn_ == 6 && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 0, false);
@ -2446,9 +2445,8 @@ private:
L(mulPreL); // called only from asm code
mulPre6(sf.t);
ret();
return func;
f = func;
}
return 0;
}
static inline void debug_put_inner(const uint64_t *ptr, int n)
{

@ -65,7 +65,7 @@ void setStaticCode(mcl::fp::Op& op)
op.fpDbl_subA_ = mclx_FpDbl_sub;
op.fpDbl_addPre = mclx_FpDbl_addPre;
op.fpDbl_subPre = mclx_FpDbl_subPre;
op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
op.fpDbl_mulPre = mclx_FpDbl_mulPre;
op.fpDbl_sqrPre = mclx_FpDbl_sqrPre;
op.fp2_addA_ = mclx_Fp2_add;
op.fp2_subA_ = mclx_Fp2_sub;

Loading…
Cancel
Save