remove mulPreC

update-fork
MITSUNARI Shigeo 4 years ago
parent ebbb5cf6cc
commit 52a9f4d213
  1. 9
      include/mcl/fp_tower.hpp
  2. 2
      include/mcl/op.hpp
  3. 28
      src/fp_generator.hpp
  4. 2
      src/fp_static_code.hpp

@ -113,24 +113,22 @@ public:
static void (*mod)(Fp& z, const FpDblT& xy); static void (*mod)(Fp& z, const FpDblT& xy);
static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
#else #else
static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
#endif #endif
/* /*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/ */
static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
static void (*sqrPre)(FpDblT& xx, const Fp& x); static void (*sqrPre)(FpDblT& xx, const Fp& x);
static void mulUnit(FpDblT& z, const FpDblT& x, Unit y) static void mulUnit(FpDblT& z, const FpDblT& x, Unit y)
{ {
@ -151,9 +149,8 @@ public:
if (addPre == 0) addPre = addPreC; if (addPre == 0) addPre = addPreC;
subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre); subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre);
if (subPre == 0) subPre = subPreC; if (subPre == 0) subPre = subPreC;
mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_);
if (mulPre == 0) mulPre = mulPreC;
#endif #endif
mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPre);
sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPre); sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPre);
} }
void operator+=(const FpDblT& x) { add(*this, *this, x); } void operator+=(const FpDblT& x) { add(*this, *this, x); }
@ -166,8 +163,8 @@ template<class Fp> void (*FpDblT<Fp>::sub)(FpDblT&, const FpDblT&, const FpDblT&
template<class Fp> void (*FpDblT<Fp>::mod)(Fp&, const FpDblT&); template<class Fp> void (*FpDblT<Fp>::mod)(Fp&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::addPre)(FpDblT&, const FpDblT&, const FpDblT&); template<class Fp> void (*FpDblT<Fp>::addPre)(FpDblT&, const FpDblT&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::subPre)(FpDblT&, const FpDblT&, const FpDblT&); template<class Fp> void (*FpDblT<Fp>::subPre)(FpDblT&, const FpDblT&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&);
#endif #endif
template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&);
template<class Fp> void (*FpDblT<Fp>::sqrPre)(FpDblT&, const Fp&); template<class Fp> void (*FpDblT<Fp>::sqrPre)(FpDblT&, const Fp&);
template<class Fp> struct Fp12T; template<class Fp> struct Fp12T;

@ -221,7 +221,6 @@ struct Op {
void2u fp2_sqrA_; void2u fp2_sqrA_;
void3u fpDbl_addA_; void3u fpDbl_addA_;
void3u fpDbl_subA_; void3u fpDbl_subA_;
void3u fpDbl_mulPreA_;
void2u fpDbl_modA_; void2u fpDbl_modA_;
void3u fp2Dbl_mulPreA_; void3u fp2Dbl_mulPreA_;
void2u fp2Dbl_sqrPreA_; void2u fp2Dbl_sqrPreA_;
@ -307,7 +306,6 @@ struct Op {
fp2_sqrA_ = 0; fp2_sqrA_ = 0;
fpDbl_addA_ = 0; fpDbl_addA_ = 0;
fpDbl_subA_ = 0; fpDbl_subA_ = 0;
fpDbl_mulPreA_ = 0;
fpDbl_modA_ = 0; fpDbl_modA_ = 0;
fp2Dbl_mulPreA_ = 0; fp2Dbl_mulPreA_ = 0;
fp2Dbl_sqrPreA_ = 0; fp2Dbl_sqrPreA_ = 0;

@ -421,11 +421,11 @@ private:
setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr()); setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr());
align(16); align(16);
op.fpDbl_mulPreA_ = gen_fpDbl_mulPre(); gen_fpDbl_mulPre(op.fpDbl_mulPre);
setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr()); setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr());
align(16); align(16);
op.fpDbl_sqrPre = gen_fpDbl_sqrPre(); gen_fpDbl_sqrPre(op.fpDbl_sqrPre);
setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr()); setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
align(16); align(16);
@ -2373,36 +2373,35 @@ private:
cmovc_rr(zp, keep); cmovc_rr(zp, keep);
store_mr(z, zp); store_mr(z, zp);
} }
void2u gen_fpDbl_sqrPre() void gen_fpDbl_sqrPre(void2u& f)
{ {
void2u func = getCurr<void2u>(); void2u func = getCurr<void2u>();
if (pn_ == 2 && useMulx_) { if (pn_ == 2 && useMulx_) {
StackFrame sf(this, 2, 7 | UseRDX); StackFrame sf(this, 2, 7 | UseRDX);
sqrPre2(sf.p[0], sf.p[1], sf.t); sqrPre2(sf.p[0], sf.p[1], sf.t);
return func; f = func;
} }
if (pn_ == 3) { if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t; Pack t = sf.t;
t.append(sf.p[2]); t.append(sf.p[2]);
sqrPre3(sf.p[0], sf.p[1], t); sqrPre3(sf.p[0], sf.p[1], t);
return func; f = func;
} }
if (pn_ == 4 && useMulx_) { if (pn_ == 4 && useMulx_) {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t; Pack t = sf.t;
t.append(sf.p[2]); t.append(sf.p[2]);
sqrPre4(sf.p[0], sf.p[1], t); sqrPre4(sf.p[0], sf.p[1], t);
return func; f = func;
} }
if (pn_ == 6 && useMulx_ && useAdx_) { if (pn_ == 6 && useMulx_ && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 6 * 8); StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
Pack t = sf.t; Pack t = sf.t;
t.append(sf.p[2]); t.append(sf.p[2]);
sqrPre6(sf.p[0], sf.p[1], t); sqrPre6(sf.p[0], sf.p[1], t);
return func; f = func;
} }
return 0;
#if 0 #if 0
#ifdef XBYAK64_WIN #ifdef XBYAK64_WIN
mov(r8, rdx); mov(r8, rdx);
@ -2413,18 +2412,18 @@ private:
return func; return func;
#endif #endif
} }
void3u gen_fpDbl_mulPre() void gen_fpDbl_mulPre(void3u& f)
{ {
void3u func = getCurr<void3u>(); void3u func = getCurr<void3u>();
if (pn_ == 2 && useMulx_) { if (pn_ == 2 && useMulx_) {
StackFrame sf(this, 3, 5 | UseRDX); StackFrame sf(this, 3, 5 | UseRDX);
mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t); mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t);
return func; f = func;
} }
if (pn_ == 3) { if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);
mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t); mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t);
return func; f = func;
} }
if (pn_ == 4) { if (pn_ == 4) {
/* /*
@ -2437,7 +2436,7 @@ private:
L(mulPreL); // called only from asm code L(mulPreL); // called only from asm code
mulPre4(gp0, gp1, gp2, sf.t); mulPre4(gp0, gp1, gp2, sf.t);
ret(); ret();
return func; f = func;
} }
if (pn_ == 6 && useAdx_) { if (pn_ == 6 && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 0, false); StackFrame sf(this, 3, 10 | UseRDX, 0, false);
@ -2446,9 +2445,8 @@ private:
L(mulPreL); // called only from asm code L(mulPreL); // called only from asm code
mulPre6(sf.t); mulPre6(sf.t);
ret(); ret();
return func; f = func;
} }
return 0;
} }
static inline void debug_put_inner(const uint64_t *ptr, int n) static inline void debug_put_inner(const uint64_t *ptr, int n)
{ {

@ -65,7 +65,7 @@ void setStaticCode(mcl::fp::Op& op)
op.fpDbl_subA_ = mclx_FpDbl_sub; op.fpDbl_subA_ = mclx_FpDbl_sub;
op.fpDbl_addPre = mclx_FpDbl_addPre; op.fpDbl_addPre = mclx_FpDbl_addPre;
op.fpDbl_subPre = mclx_FpDbl_subPre; op.fpDbl_subPre = mclx_FpDbl_subPre;
op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre; op.fpDbl_mulPre = mclx_FpDbl_mulPre;
op.fpDbl_sqrPre = mclx_FpDbl_sqrPre; op.fpDbl_sqrPre = mclx_FpDbl_sqrPre;
op.fp2_addA_ = mclx_Fp2_add; op.fp2_addA_ = mclx_Fp2_add;
op.fp2_subA_ = mclx_Fp2_sub; op.fp2_subA_ = mclx_Fp2_sub;

Loading…
Cancel
Save