Merge branch 'dev'

update-fork
MITSUNARI Shigeo 4 years ago
commit 19616e15f7
  1. 20
      include/mcl/fp_tower.hpp
  2. 4
      include/mcl/op.hpp
  3. 1
      src/fp.cpp
  4. 30
      src/fp_generator.hpp
  5. 4
      src/fp_static_code.hpp

@ -125,8 +125,6 @@ public:
static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
#endif
static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
static void sqrPreC(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); }
/*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/
@ -152,16 +150,8 @@ public:
subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre);
if (subPre == 0) subPre = subPreC;
#endif
if (op.fpDbl_mulPreA_) {
mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_);
} else {
mulPre = mulPreC;
}
if (op.fpDbl_sqrPreA_) {
sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPreA_);
} else {
sqrPre = sqrPreC;
}
mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPre);
sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPre);
}
void operator+=(const FpDblT& x) { add(*this, *this, x); }
void operator-=(const FpDblT& x) { sub(*this, *this, x); }
@ -690,9 +680,9 @@ struct Fp2DblT {
mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_);
} else {
if (op.isFullBit) {
mulPre = fp2Dbl_mulPreW<true>;
mulPre = fp2Dbl_mulPreTW<true>;
} else {
mulPre = fp2Dbl_mulPreW<false>;
mulPre = fp2Dbl_mulPreTW<false>;
}
}
if (op.fp2Dbl_sqrPreA_) {
@ -710,7 +700,7 @@ struct Fp2DblT {
@note mod of NIST_P192 is fast
*/
template<bool isFullBit>
static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y)
static void fp2Dbl_mulPreTW(Fp2DblT& z, const Fp2& x, const Fp2& y)
{
const Fp& a = x.a;
const Fp& b = x.b;

@ -221,8 +221,6 @@ struct Op {
void2u fp2_sqrA_;
void3u fpDbl_addA_;
void3u fpDbl_subA_;
void3u fpDbl_mulPreA_;
void2u fpDbl_sqrPreA_;
void2u fpDbl_modA_;
void3u fp2Dbl_mulPreA_;
void2u fp2Dbl_sqrPreA_;
@ -308,8 +306,6 @@ struct Op {
fp2_sqrA_ = 0;
fpDbl_addA_ = 0;
fpDbl_subA_ = 0;
fpDbl_mulPreA_ = 0;
fpDbl_sqrPreA_ = 0;
fpDbl_modA_ = 0;
fp2Dbl_mulPreA_ = 0;
fp2Dbl_sqrPreA_ = 0;

@ -426,6 +426,7 @@ void setWasmOp(Op& op)
op.fp_mul = mcl::mulMontT<N>;
op.fp_sqr = mcl::sqrMontT<N>;
op.fpDbl_mulPre = mulT<N>;
// op.fpDbl_sqrPre = sqrT<N>;
op.fpDbl_mod = modT<N>;
}
#endif

@ -421,12 +421,12 @@ private:
setFuncInfo(prof_, suf, "Dbl_subPre", op.fpDbl_subPre, getCurr());
align(16);
op.fpDbl_mulPreA_ = gen_fpDbl_mulPre();
setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPreA_, getCurr());
gen_fpDbl_mulPre(op.fpDbl_mulPre);
setFuncInfo(prof_, suf, "Dbl_mulPre", op.fpDbl_mulPre, getCurr());
align(16);
op.fpDbl_sqrPreA_ = gen_fpDbl_sqrPre();
setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPreA_, getCurr());
gen_fpDbl_sqrPre(op.fpDbl_sqrPre);
setFuncInfo(prof_, suf, "Dbl_sqrPre", op.fpDbl_sqrPre, getCurr());
align(16);
op.fp2_addA_ = gen_fp2_add();
@ -2373,36 +2373,35 @@ private:
cmovc_rr(zp, keep);
store_mr(z, zp);
}
void2u gen_fpDbl_sqrPre()
void gen_fpDbl_sqrPre(void2u& f)
{
void2u func = getCurr<void2u>();
if (pn_ == 2 && useMulx_) {
StackFrame sf(this, 2, 7 | UseRDX);
sqrPre2(sf.p[0], sf.p[1], sf.t);
return func;
f = func;
}
if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre3(sf.p[0], sf.p[1], t);
return func;
f = func;
}
if (pn_ == 4 && useMulx_) {
StackFrame sf(this, 3, 10 | UseRDX);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre4(sf.p[0], sf.p[1], t);
return func;
f = func;
}
if (pn_ == 6 && useMulx_ && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 6 * 8);
Pack t = sf.t;
t.append(sf.p[2]);
sqrPre6(sf.p[0], sf.p[1], t);
return func;
f = func;
}
return 0;
#if 0
#ifdef XBYAK64_WIN
mov(r8, rdx);
@ -2413,18 +2412,18 @@ private:
return func;
#endif
}
void3u gen_fpDbl_mulPre()
void gen_fpDbl_mulPre(void3u& f)
{
void3u func = getCurr<void3u>();
if (pn_ == 2 && useMulx_) {
StackFrame sf(this, 3, 5 | UseRDX);
mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t);
return func;
f = func;
}
if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX);
mulPre3(sf.p[0], sf.p[1], sf.p[2], sf.t);
return func;
f = func;
}
if (pn_ == 4) {
/*
@ -2437,7 +2436,7 @@ private:
L(mulPreL); // called only from asm code
mulPre4(gp0, gp1, gp2, sf.t);
ret();
return func;
f = func;
}
if (pn_ == 6 && useAdx_) {
StackFrame sf(this, 3, 10 | UseRDX, 0, false);
@ -2446,9 +2445,8 @@ private:
L(mulPreL); // called only from asm code
mulPre6(sf.t);
ret();
return func;
f = func;
}
return 0;
}
static inline void debug_put_inner(const uint64_t *ptr, int n)
{

@ -65,8 +65,8 @@ void setStaticCode(mcl::fp::Op& op)
op.fpDbl_subA_ = mclx_FpDbl_sub;
op.fpDbl_addPre = mclx_FpDbl_addPre;
op.fpDbl_subPre = mclx_FpDbl_subPre;
op.fpDbl_mulPreA_ = mclx_FpDbl_mulPre;
op.fpDbl_sqrPreA_ = mclx_FpDbl_sqrPre;
op.fpDbl_mulPre = mclx_FpDbl_mulPre;
op.fpDbl_sqrPre = mclx_FpDbl_sqrPre;
op.fp2_addA_ = mclx_Fp2_add;
op.fp2_subA_ = mclx_Fp2_sub;
op.fp2_negA_ = mclx_Fp2_neg;

Loading…
Cancel
Save