Merge branch 'dev'

update-fork
MITSUNARI Shigeo 4 years ago
commit 19616e15f7
  1. 20
      include/mcl/fp_tower.hpp
  2. 4
      include/mcl/op.hpp
  3. 1
      src/fp.cpp
  4. 30
      src/fp_generator.hpp
  5. 4
      src/fp_static_code.hpp

@ -125,8 +125,6 @@ public:
static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
#endif #endif
static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
static void sqrPreC(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); }
/* /*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/ */
@ -152,16 +150,8 @@ public:
subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre); subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre);
if (subPre == 0) subPre = subPreC; if (subPre == 0) subPre = subPreC;
#endif #endif
if (op.fpDbl_mulPreA_) { mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPre);
mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_); sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPre);
} else {
mulPre = mulPreC;
}
if (op.fpDbl_sqrPreA_) {
sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPreA_);
} else {
sqrPre = sqrPreC;
}
} }
void operator+=(const FpDblT& x) { add(*this, *this, x); } void operator+=(const FpDblT& x) { add(*this, *this, x); }
void operator-=(const FpDblT& x) { sub(*this, *this, x); } void operator-=(const FpDblT& x) { sub(*this, *this, x); }
@ -690,9 +680,9 @@ struct Fp2DblT {
mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_); mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_);
} else { } else {
if (op.isFullBit) { if (op.isFullBit) {
mulPre = fp2Dbl_mulPreW<true>; mulPre = fp2Dbl_mulPreTW<true>;
} else { } else {
mulPre = fp2Dbl_mulPreW<false>; mulPre = fp2Dbl_mulPreTW<false>;
} }
} }
if (op.fp2Dbl_sqrPreA_) { if (op.fp2Dbl_sqrPreA_) {
@ -710,7 +700,7 @@ struct Fp2DblT {
@note mod of NIST_P192 is fast @note mod of NIST_P192 is fast
*/ */
template<bool isFullBit> template<bool isFullBit>
static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y) static void fp2Dbl_mulPreTW(Fp2DblT& z, const Fp2& x, const Fp2& y)
{ {
const Fp& a = x.a; const Fp& a = x.a;
const Fp& b = x.b; const Fp& b = x.b;

@ -221,8 +221,6 @@ struct Op {
void2u fp2_sqrA_; void2u fp2_sqrA_;
void3u fpDbl_addA_; void3u fpDbl_addA_;
void3u fpDbl_subA_; void3u fpDbl_subA_;
void3u fpDbl_mulPreA_;
void2u fpDbl_sqrPreA_;
void2u fpDbl_modA_; void2u fpDbl_modA_;
void3u fp2Dbl_mulPreA_; void3u fp2Dbl_mulPreA_;
void2u fp2Dbl_sqrPreA_; void2u fp2Dbl_sqrPreA_;
@ -308,8 +306,6 @@ struct Op {
fp2_sqrA_ = 0; fp2_sqrA_ = 0;
fpDbl_addA_ = 0; fpDbl_addA_ = 0;
fpDbl_subA_ = 0; fpDbl_subA_ = 0;
fpDbl_mulPreA_ = 0;
fpDbl_sqrPreA_ = 0;
fpDbl_modA_ = 0; fpDbl_modA_ = 0;
fp2Dbl_mulPreA_ = 0; fp2Dbl_mulPreA_ = 0;
fp2Dbl_sqrPreA_ = 0; fp2Dbl_sqrPreA_ = 0;

@ -426,6 +426,7 @@ void setWasmOp(Op& op)
op.fp_mul = mcl::mulMontT<N>; op.fp_mul = mcl::mulMontT<N>;
op.fp_sqr = mcl::sqrMontT<N>; op.fp_sqr = mcl::sqrMontT<N>;
op.fpDbl_mulPre = mulT<N>; op.fpDbl_mulPre = mulT<N>;
// op.fpDbl_sqrPre = sqrT<N>;
op.fpDbl_mod = modT<N>; op.fpDbl_mod = modT<N>;
} }
#endif #endif

@ -421,12 +421,12 @@ private:
setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr()); setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr());
align(16); align(16);
op.fpDbl_mulPreA_ = gen_fpDbl_mulPre(); gen_fpDbl_mulPre(op.fpDbl_mulPre);
setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr()); setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr());
align(16); align(16);
op.fpDbl_sqrPreA_ = gen_fpDbl_sqrPre(); gen_fpDbl_sqrPre(op.fpDbl_sqrPre);
setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPreA_, getCurr()); setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
align(16); align(16);
op.fp2_addA_ = gen_fp2_add(); op.fp2_addA_ = gen_fp2_add();
@ -2373,36 +2373,35 @@ private:
cmovc_rr(zp, keep); cmovc_rr(zp, keep);
store_mr(z, zp); store_mr(z, zp);
} }
void2u gen_fpDbl_sqrPre() void gen_fpDbl_sqrPre(void2u& f)
{ {
void2u func = getCurr<void2u>(); void2u func = getCurr<void2u>();
if (pn_ == 2 && useMulx_) { if (pn_ == 2 && useMulx_) {
StackFrame sf(this, 2, 7 | UseRDX); StackFrame sf(this, 2, 7 | UseRDX);
sqrPre2(sf.p[0], sf.p[1], sf.t); sqrPre2(sf.p[0], sf.p[1], sf.t);
return func; f = func;
} }
if (pn_ == 3) { if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t; Pack t = sf.t;
t.append(sf.p[2]); t.append(sf.p[2]);
sqrPre3(sf.p[0], sf.p[1], t); sqrPre3(sf.p[0], sf.p[1], t);
return func; f = func;
} }
if (pn_ == 4 && useMulx_) { if (pn_ == 4 && useMulx_) {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t; Pack t = sf.t;
t.append(sf.p[2]); t.append(sf.p[2]);
sqrPre4(sf.p[0], sf.p[1], t); sqrPre4(sf.p[0], sf.p[1], t);
return func; f = func;
} }
if (pn_ == 6 && useMulx_ && useAdx_) { if (pn_ == 6 && useMulx_ && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 6 * 8); StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
Pack t = sf.t; Pack t = sf.t;
t.append(sf.p[2]); t.append(sf.p[2]);
sqrPre6(sf.p[0], sf.p[1], t); sqrPre6(sf.p[0], sf.p[1], t);
return func; f = func;
} }
return 0;
#if 0 #if 0
#ifdef XBYAK64_WIN #ifdef XBYAK64_WIN
mov(r8, rdx); mov(r8, rdx);
@ -2413,18 +2412,18 @@ private:
return func; return func;
#endif #endif
} }
void3u gen_fpDbl_mulPre() void gen_fpDbl_mulPre(void3u& f)
{ {
void3u func = getCurr<void3u>(); void3u func = getCurr<void3u>();
if (pn_ == 2 && useMulx_) { if (pn_ == 2 && useMulx_) {
StackFrame sf(this, 3, 5 | UseRDX); StackFrame sf(this, 3, 5 | UseRDX);
mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t); mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t);
return func; f = func;
} }
if (pn_ == 3) { if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);
mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t); mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t);
return func; f = func;
} }
if (pn_ == 4) { if (pn_ == 4) {
/* /*
@ -2437,7 +2436,7 @@ private:
L(mulPreL); // called only from asm code L(mulPreL); // called only from asm code
mulPre4(gp0, gp1, gp2, sf.t); mulPre4(gp0, gp1, gp2, sf.t);
ret(); ret();
return func; f = func;
} }
if (pn_ == 6 && useAdx_) { if (pn_ == 6 && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 0, false); StackFrame sf(this, 3, 10 | UseRDX, 0, false);
@ -2446,9 +2445,8 @@ private:
L(mulPreL); // called only from asm code L(mulPreL); // called only from asm code
mulPre6(sf.t); mulPre6(sf.t);
ret(); ret();
return func; f = func;
} }
return 0;
} }
static inline void debug_put_inner(const uint64_t *ptr, int n) static inline void debug_put_inner(const uint64_t *ptr, int n)
{ {

@ -65,8 +65,8 @@ void setStaticCode(mcl::fp::Op& op)
op.fpDbl_subA_ = mclx_FpDbl_sub; op.fpDbl_subA_ = mclx_FpDbl_sub;
op.fpDbl_addPre = mclx_FpDbl_addPre; op.fpDbl_addPre = mclx_FpDbl_addPre;
op.fpDbl_subPre = mclx_FpDbl_subPre; op.fpDbl_subPre = mclx_FpDbl_subPre;
op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre; op.fpDbl_mulPre = mclx_FpDbl_mulPre;
op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre; op.fpDbl_sqrPre = mclx_FpDbl_sqrPre;
op.fp2_addA_ = mclx_Fp2_add; op.fp2_addA_ = mclx_Fp2_add;
op.fp2_subA_ = mclx_Fp2_sub; op.fp2_subA_ = mclx_Fp2_sub;
op.fp2_negA_ = mclx_Fp2_neg; op.fp2_negA_ = mclx_Fp2_neg;

Loading…
Cancel
Save