add Fp2::add for bls12

dev
MITSUNARI Shigeo 6 years ago
parent 7c3a15b0f8
commit f4b4382433
  1. 36
      src/fp_generator.hpp
  2. 12
      test/bls12_test.cpp

@ -324,16 +324,14 @@ private:
if (func) { if (func) {
op.fp_sqrA_ = reinterpret_cast<void2u>(func); op.fp_sqrA_ = reinterpret_cast<void2u>(func);
} }
if (op.N > 4) return;
if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4 if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4
align(16); align(16);
op.fp_preInv = getCurr<int2u>(); op.fp_preInv = getCurr<int2u>();
gen_preInv(); gen_preInv();
} }
op.fp2_addA_ = gen_fp2_add();
if (op.N == 4 && !isFullBit_) { if (op.N == 4 && !isFullBit_) {
align(16);
op.fp2_addA_ = getCurr<void3u>();
gen_fp2_add4();
align(16); align(16);
op.fp2_subA_ = getCurr<void3u>(); op.fp2_subA_ = getCurr<void3u>();
gen_fp2_sub4(); gen_fp2_sub4();
@ -3505,6 +3503,36 @@ private:
gen_raw_fp_add(sf.p[0], sf.p[1], sf.p[2], sf.t, false); gen_raw_fp_add(sf.p[0], sf.p[1], sf.p[2], sf.t, false);
gen_raw_fp_add(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.p[2] + FpByte_, sf.t, false); gen_raw_fp_add(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.p[2] + FpByte_, sf.t, false);
} }
void gen_fp2_add6()
{
assert(!isFullBit_);
StackFrame sf(this, 3, 10);
const Reg64& pz = sf.p[0];
const Reg64& px = sf.p[1];
const Reg64& py = sf.p[2];
Pack t1 = sf.t.sub(0, 6);
Pack t2 = sf.t.sub(6);
t2.append(rax);
t2.append(px); // destory after used
movq(xm0, px);
gen_raw_fp_add6(pz, px, py, 0, t1, t2, false);
movq(px, xm0);
gen_raw_fp_add6(pz, px, py, FpByte_, t1, t2, false);
}
void3u gen_fp2_add()
{
align(16);
void3u func = getCurr<void3u>();
if (pn_ == 4 && !isFullBit_) {
gen_fp2_add4();
return func;
}
if (pn_ == 6 && !isFullBit_) {
gen_fp2_add6();
return func;
}
return 0;
}
void gen_fp2_sub4() void gen_fp2_sub4()
{ {
assert(!isFullBit_); assert(!isFullBit_);

@ -687,6 +687,11 @@ int main(int argc, char *argv[])
} }
FpDbl dx; FpDbl dx;
FpDbl::mulPre(dx, xv[0], yv[0]); FpDbl::mulPre(dx, xv[0], yv[0]);
Fp2 x2, y2;
x2.a.setByCSPRNG(rg);
x2.b.setByCSPRNG(rg);
y2.a.setByCSPRNG(rg);
y2.b.setByCSPRNG(rg);
if(0){ if(0){
puts("----------"); puts("----------");
xv[0].dump(); xv[0].dump();
@ -695,12 +700,13 @@ if(0){
puts("----------"); puts("----------");
// exit(1); // exit(1);
} }
// CYBOZU_BENCH_C("subDbl", 10000000, FpDbl::sub, dx, dx, dx); CYBOZU_BENCH_C("Fp2::add", 10000000, Fp2::add, x2, x2, y2);
CYBOZU_BENCH_C("Fp2::sub", 10000000, Fp2::sub, x2, x2, y2);
// CYBOZU_BENCH_C("mulPre", 100000000, FpDbl::mulPre, dx, xv[0], yv[0]); // CYBOZU_BENCH_C("mulPre", 100000000, FpDbl::mulPre, dx, xv[0], yv[0]);
// CYBOZU_BENCH_C("sqrPre", 100000000, FpDbl::sqrPre, dx, xv[0]); // CYBOZU_BENCH_C("sqrPre", 100000000, FpDbl::sqrPre, dx, xv[0]);
// CYBOZU_BENCH_C("mod ", 100000000, FpDbl::mod, xv[0], dx); // CYBOZU_BENCH_C("mod ", 100000000, FpDbl::mod, xv[0], dx);
CYBOZU_BENCH_C("mul ", 100000000, Fp::mul, xv[0], yv[0], xv[0]); // CYBOZU_BENCH_C("mul ", 100000000, Fp::mul, xv[0], yv[0], xv[0]);
CYBOZU_BENCH_C("sqr ", 100000000, Fp::sqr, xv[0], xv[0]); // CYBOZU_BENCH_C("sqr ", 100000000, Fp::sqr, xv[0], xv[0]);
return 0; return 0;
#endif #endif
return cybozu::test::autoRun.run(argc, argv); return cybozu::test::autoRun.run(argc, argv);

Loading…
Cancel
Save