From 00e7944894f1307bf2f451d66d4be98677f25009 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 7 Mar 2016 07:17:16 +0900 Subject: [PATCH] under construction of gen_fpDbl_add --- include/mcl/fp_generator.hpp | 34 +++++++++++++++++++++++++++++----- include/mcl/fp_tower.hpp | 7 +++++-- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/include/mcl/fp_generator.hpp b/include/mcl/fp_generator.hpp index 65f89e8..746555c 100644 --- a/include/mcl/fp_generator.hpp +++ b/include/mcl/fp_generator.hpp @@ -207,6 +207,7 @@ struct FpGenerator : Xbyak::CodeGenerator { op.fp_subNC = getCurr(); gen_addSubNC(false, pn_); } + align(16); op.fp_neg = getCurr(); gen_neg(); @@ -223,16 +224,31 @@ struct FpGenerator : Xbyak::CodeGenerator { align(16); shr1_ = getCurr(); gen_shr1(); + if (op.N <= 4) { // support general op.N but not fast for op.N > 4 + align(16); + op.fp_preInv = getCurr(); + gen_preInv(); + } + // setup fp_tower + if (op.N > 4) return; + align(16); +// op.fpDbl_add = getCurr(); +// gen_fpDbl_add(); + if (op.isFullBit) { +// op.fpDbl_addNC = op.fpDbl_add; + } else { + align(16); + op.fpDbl_addNC = getCurr(); + gen_addSubNC(true, pn_ * 2); + align(16); + op.fpDbl_subNC = getCurr(); + gen_addSubNC(false, pn_ * 2); + } if (op.N == 3 || op.N == 4) { align(16); op.fp_mod = getCurr(); gen_montRed(); } - if (op.N <= 4) { // support general op.N but not fast - align(16); - op.fp_preInv = getCurr(); - gen_preInv(); - } } void gen_addSubNC(bool isAdd, int n) { @@ -494,6 +510,14 @@ struct FpGenerator : Xbyak::CodeGenerator { #endif outLocalLabel(); } + void gen_fpDbl_add() + { + StackFrame sf(this, 3, 0); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + const Reg64& py = sf.p[2]; + gen_raw_add(pz, px, py, rax, pn_); + } void gen_sub() { if (pn_ <= 4) { diff --git a/include/mcl/fp_tower.hpp b/include/mcl/fp_tower.hpp index 83d461a..24b2d6c 100644 --- a/include/mcl/fp_tower.hpp +++ b/include/mcl/fp_tower.hpp @@ -44,8 +44,11 @@ public: static inline void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_); } static inline void addNC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addNC(z.v_, x.v_, y.v_); } static inline void subNC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subNC(z.v_, x.v_, y.v_); } - static inline void mulPre(FpDblT& z, const Fp& x, const Fp& y) { Fp::op_.fp_mulPre(z.v_, x.v_, y.v_); } - static inline void mod(Fp& y, const FpDblT& x) { Fp::op_.fp_mod(y.v_, x.v_); } + /* + mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) + */ + static inline void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fp_mulPre(xy.v_, x.v_, y.v_); } + static inline void mod(Fp& z, const FpDblT& xy) { Fp::op_.fp_mod(z.v_, xy.v_); } }; /*