update-fork
MITSUNARI Shigeo 4 years ago
parent bcf5961f70
commit 40343a4a69
  1. 4
      include/mcl/op.hpp
  2. 31
      src/fp_generator.hpp
  3. 4
      src/fp_static_code.hpp
  4. 2
      test/bench.hpp

@ -232,12 +232,12 @@ struct Op {
void1u fp_clear; void1u fp_clear;
void2u fp_copy; void2u fp_copy;
void2u fp_shr1; void2u fp_shr1;
void3u fp_mul2;
void3u fp_neg; void3u fp_neg;
void4u fp_add; void4u fp_add;
void4u fp_sub; void4u fp_sub;
void4u fp_mul; void4u fp_mul;
void3u fp_sqr; void3u fp_sqr;
void3u fp_mul2;
void2uOp fp_invOp; void2uOp fp_invOp;
void2uIu fp_mulUnit; // fpN1_mod + fp_mulUnitPre void2uIu fp_mulUnit; // fpN1_mod + fp_mulUnitPre
@ -319,12 +319,12 @@ struct Op {
fp_clear = 0; fp_clear = 0;
fp_copy = 0; fp_copy = 0;
fp_shr1 = 0; fp_shr1 = 0;
fp_mul2 = 0;
fp_neg = 0; fp_neg = 0;
fp_add = 0; fp_add = 0;
fp_sub = 0; fp_sub = 0;
fp_mul = 0; fp_mul = 0;
fp_sqr = 0; fp_sqr = 0;
fp_mul2 = 0;
fp_invOp = 0; fp_invOp = 0;
fp_mulUnit = 0; fp_mulUnit = 0;

@ -379,6 +379,10 @@ private:
op.fp_shr1 = gen_shr1(); op.fp_shr1 = gen_shr1();
setFuncInfo(prof_, suf, "_shr1", op.fp_shr1, getCurr()); setFuncInfo(prof_, suf, "_shr1", op.fp_shr1, getCurr());
align(16);
op.fp_mul2A_ = gen_mul2();
setFuncInfo(prof_, suf, "_mul2", op.fp_mul2A_, getCurr());
align(16); align(16);
op.fp_negA_ = gen_fp_neg(); op.fp_negA_ = gen_fp_neg();
setFuncInfo(prof_, suf, "_neg", op.fp_negA_, getCurr()); setFuncInfo(prof_, suf, "_neg", op.fp_negA_, getCurr());
@ -915,6 +919,33 @@ private:
mov(ptr [pz + (pn_ - 1) * 8], *t0); mov(ptr [pz + (pn_ - 1) * 8], *t0);
return func; return func;
} }
void2u gen_mul2()
{
if (isFullBit_) return 0;
if (!(pn_ == 4 || pn_ == 6)) return 0;
void2u func = getCurr<void2u>();
const int n = pn_ * 2 - 2;
StackFrame sf(this, 2, n);
Pack x = sf.t.sub(0, pn_);
load_rm(x, sf.p[1]);
#if 0
add_rr(x, x);
#else
for (int i = pn_ - 1; i > 0; i--) {
shld(x[i], x[i - 1], 1);
}
shl(x[0], 1);
#endif
Pack t = sf.t.sub(pn_, n - pn_);
t.append(sf.p[1]);
t.append(rax); // destroy last
mov_rr(t, x);
lea(rax, ptr[rip + pL_]);
sub_rm(t, rax);
cmovc_rr(t, x);
store_mr(sf.p[0], t);
return func;
}
void3u gen_mul() void3u gen_mul()
{ {
void3u func = getCurr<void3u>(); void3u func = getCurr<void3u>();

@ -23,6 +23,7 @@ void mclx_Fp_neg(Unit*, const Unit*);
void mclx_FpDbl_mod(Unit*, const Unit*); void mclx_FpDbl_mod(Unit*, const Unit*);
void mclx_Fp_mul(Unit*, const Unit*, const Unit*); void mclx_Fp_mul(Unit*, const Unit*, const Unit*);
void mclx_Fp_sqr(Unit*, const Unit*); void mclx_Fp_sqr(Unit*, const Unit*);
void mclx_Fp_mul2(Unit*, const Unit*);
void mclx_FpDbl_add(Unit*, const Unit*, const Unit*); void mclx_FpDbl_add(Unit*, const Unit*, const Unit*);
void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*); void mclx_FpDbl_sub(Unit*, const Unit*, const Unit*);
int mclx_Fp_preInv(Unit*, const Unit*); int mclx_Fp_preInv(Unit*, const Unit*);
@ -45,6 +46,7 @@ void mclx_Fr_shr1(Unit*, const Unit*);
void mclx_Fr_neg(Unit*, const Unit*); void mclx_Fr_neg(Unit*, const Unit*);
void mclx_Fr_mul(Unit*, const Unit*, const Unit*); void mclx_Fr_mul(Unit*, const Unit*, const Unit*);
void mclx_Fr_sqr(Unit*, const Unit*); void mclx_Fr_sqr(Unit*, const Unit*);
void mclx_Fr_mul2(Unit*, const Unit*);
int mclx_Fr_preInv(Unit*, const Unit*); int mclx_Fr_preInv(Unit*, const Unit*);
} // extern "C" } // extern "C"
@ -61,6 +63,7 @@ void setStaticCode(mcl::fp::Op& op)
op.fpDbl_modA_ = mclx_FpDbl_mod; op.fpDbl_modA_ = mclx_FpDbl_mod;
op.fp_mulA_ = mclx_Fp_mul; op.fp_mulA_ = mclx_Fp_mul;
op.fp_sqrA_ = mclx_Fp_sqr; op.fp_sqrA_ = mclx_Fp_sqr;
op.fp_mul2A_ = mclx_Fp_mul2;
op.fpDbl_addA_ = mclx_FpDbl_add; op.fpDbl_addA_ = mclx_FpDbl_add;
op.fpDbl_subA_ = mclx_FpDbl_sub; op.fpDbl_subA_ = mclx_FpDbl_sub;
op.fpDbl_addPre = mclx_FpDbl_addPre; op.fpDbl_addPre = mclx_FpDbl_addPre;
@ -85,6 +88,7 @@ void setStaticCode(mcl::fp::Op& op)
op.fp_negA_ = mclx_Fr_neg; op.fp_negA_ = mclx_Fr_neg;
op.fp_mulA_ = mclx_Fr_mul; op.fp_mulA_ = mclx_Fr_mul;
op.fp_sqrA_ = mclx_Fr_sqr; op.fp_sqrA_ = mclx_Fr_sqr;
op.fp_mul2A_ = mclx_Fr_mul2;
op.fp_preInv = mclx_Fr_preInv; op.fp_preInv = mclx_Fr_preInv;
} }
op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_); op.fp_mul = fp::func_ptr_cast<void4u>(op.fp_mulA_);

@ -116,6 +116,8 @@ void testBench(const G1& P, const G2& Q)
CYBOZU_BENCH_C("Fr::add ", C3, Fr::add, a, a, b); CYBOZU_BENCH_C("Fr::add ", C3, Fr::add, a, a, b);
CYBOZU_BENCH_C("Fr::sub ", C3, Fr::sub, a, a, b); CYBOZU_BENCH_C("Fr::sub ", C3, Fr::sub, a, a, b);
CYBOZU_BENCH_C("Fr::neg ", C3, Fr::neg, a, a); CYBOZU_BENCH_C("Fr::neg ", C3, Fr::neg, a, a);
CYBOZU_BENCH_C("Fr::add 2 ", C3, Fr::add, a, a, b);
CYBOZU_BENCH_C("Fr::mul2 ", C3, Fr::mul2, a, a);
CYBOZU_BENCH_C("Fr::mul ", C3, Fr::mul, a, a, b); CYBOZU_BENCH_C("Fr::mul ", C3, Fr::mul, a, a, b);
CYBOZU_BENCH_C("Fr::sqr ", C3, Fr::sqr, a, a); CYBOZU_BENCH_C("Fr::sqr ", C3, Fr::sqr, a, a);
CYBOZU_BENCH_C("Fr::inv ", C3, invAdd, a, a, b); CYBOZU_BENCH_C("Fr::inv ", C3, invAdd, a, a, b);

Loading…
Cancel
Save