under construction of gen_fpDbl_add

dev
MITSUNARI Shigeo 9 years ago
parent 1a70a051c1
commit 00e7944894
  1. 34
      include/mcl/fp_generator.hpp
  2. 7
      include/mcl/fp_tower.hpp

@ -207,6 +207,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
op.fp_subNC = getCurr<void3u>();
gen_addSubNC(false, pn_);
}
align(16);
op.fp_neg = getCurr<void2u>();
gen_neg();
@ -223,16 +224,31 @@ struct FpGenerator : Xbyak::CodeGenerator {
align(16);
shr1_ = getCurr<void2op>();
gen_shr1();
if (op.N <= 4) { // support general op.N but not fast for op.N > 4
align(16);
op.fp_preInv = getCurr<int2u>();
gen_preInv();
}
// setup fp_tower
if (op.N > 4) return;
align(16);
// op.fpDbl_add = getCurr<void3u>();
// gen_fpDbl_add();
if (op.isFullBit) {
// op.fpDbl_addNC = op.fpDbl_add;
} else {
align(16);
op.fpDbl_addNC = getCurr<void3u>();
gen_addSubNC(true, pn_ * 2);
align(16);
op.fpDbl_subNC = getCurr<void3u>();
gen_addSubNC(false, pn_ * 2);
}
if (op.N == 3 || op.N == 4) {
align(16);
op.fp_mod = getCurr<void2u>();
gen_montRed();
}
if (op.N <= 4) { // support general op.N but not fast
align(16);
op.fp_preInv = getCurr<int2u>();
gen_preInv();
}
}
void gen_addSubNC(bool isAdd, int n)
{
@ -494,6 +510,14 @@ struct FpGenerator : Xbyak::CodeGenerator {
#endif
outLocalLabel();
}
void gen_fpDbl_add()
{
StackFrame sf(this, 3, 0);
const Reg64& pz = sf.p[0];
const Reg64& px = sf.p[1];
const Reg64& py = sf.p[2];
gen_raw_add(pz, px, py, rax, pn_);
}
void gen_sub()
{
if (pn_ <= 4) {

@ -44,8 +44,11 @@ public:
static inline void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_); }
static inline void addNC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addNC(z.v_, x.v_, y.v_); }
static inline void subNC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subNC(z.v_, x.v_, y.v_); }
static inline void mulPre(FpDblT& z, const Fp& x, const Fp& y) { Fp::op_.fp_mulPre(z.v_, x.v_, y.v_); }
static inline void mod(Fp& y, const FpDblT& x) { Fp::op_.fp_mod(y.v_, x.v_); }
/*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/
static inline void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fp_mulPre(xy.v_, x.v_, y.v_); }
static inline void mod(Fp& z, const FpDblT& xy) { Fp::op_.fp_mod(z.v_, xy.v_); }
};
/*

Loading…
Cancel
Save