add fp2_mul_xi for xi_a = 1

dev
MITSUNARI Shigeo 6 years ago
parent 7e09cd5762
commit eb481b1e3e
  1. 21
      include/mcl/fp_tower.hpp
  2. 2
      include/mcl/op.hpp
  3. 17
      src/fp_generator.hpp

@ -242,9 +242,9 @@ public:
static void (*neg)(Fp2T& y, const Fp2T& x);
static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
static void (*sqr)(Fp2T& y, const Fp2T& x);
static void (*mul_xi)(Fp2T& y, const Fp2T& x);
static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); }
static void inv(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_inv(y.a.v_, x.a.v_); }
static void mul_xi(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_mul_xi(y.a.v_, x.a.v_); }
static void divBy2(Fp2T& y, const Fp2T& x)
{
Fp::divBy2(y.a, x.a);
@ -405,11 +405,21 @@ public:
sqr = (void (*)(Fp2T& y, const Fp2T& x))op.fp2_sqrA_;
if (sqr == 0) sqr = fp2_sqrC;
op.fp2_inv = fp2_invW;
if (xi_a == 1) {
op.fp2_mul_xi = fp2_mul_xi_1_1i;
} else {
op.fp2_mul_xi = fp2_mul_xiW;
if (op.fp2_mul_xi == 0) {
if (xi_a == 1) {
/*
current fp_generator.hpp generates mul_xi for xi_a = 1
*/
if (op.fp2_mul_xiA_) {
op.fp2_mul_xi = op.fp2_mul_xiA_;
} else {
op.fp2_mul_xi = fp2_mul_xi_1_1i;
}
} else {
op.fp2_mul_xi = fp2_mul_xiW;
}
}
mul_xi = (void (*)(Fp2T&, const Fp2T&))op.fp2_mul_xi;
const Fp2T xi(xi_a, 1);
const mpz_class& p = Fp::getOp().mp;
Fp2T::pow(g[0], xi, (p - 1) / 6); // g = xi^((p-1)/6)
@ -621,6 +631,7 @@ template<class Fp_> void (*Fp2T<Fp_>::sub)(Fp2T& z, const Fp2T& x, const Fp2T& y
template<class Fp_> void (*Fp2T<Fp_>::neg)(Fp2T& y, const Fp2T& x);
template<class Fp_> void (*Fp2T<Fp_>::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
template<class Fp_> void (*Fp2T<Fp_>::sqr)(Fp2T& y, const Fp2T& x);
template<class Fp_> void (*Fp2T<Fp_>::mul_xi)(Fp2T& y, const Fp2T& x);
template<class Fp>
struct Fp2DblT {

@ -227,6 +227,7 @@ struct Op {
void4u fp2_mulNF;
void2u fp2_inv;
void2u fp2_mul_xi;
void2u fp2_mul_xiA_;
uint32_t (*hash)(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize);
PrimeMode primeMode;
@ -306,6 +307,7 @@ struct Op {
fp2_mulNF = 0;
fp2_inv = 0;
fp2_mul_xi = 0;
fp2_mul_xiA_ = 0;
primeMode = PM_GENERIC;
isFullBit = false;

@ -386,6 +386,9 @@ struct Code : Xbyak::CodeGenerator {
align(16);
op.fp2_sqrA_ = getCurr<void2u>();
gen_fp2_sqr4();
align(16);
op.fp2_mul_xiA_ = getCurr<void2u>();
gen_fp2_mul_xi4();
}
}
void gen_addSubPre(bool isAdd, int n)
@ -2873,6 +2876,20 @@ private:
gen_raw_fp_sub(sf.p[0], sf.p[1], sf.p[2], sf.t, false);
gen_raw_fp_sub(sf.p[0] + FpByte_, sf.p[1] + FpByte_, sf.p[2] + FpByte_, sf.t, false);
}
/*
for only xi_a = 1
*/
void gen_fp2_mul_xi4()
{
assert(!isFullBit_);
StackFrame sf(this, 2, 8, 8 * 4);
gen_raw_fp_add(rsp, sf.p[1], sf.p[1] + FpByte_, sf.t, false);
gen_raw_fp_sub(sf.p[0], sf.p[1], sf.p[1] + FpByte_, sf.t, false);
for (int i = 0; i < 4; i++) {
mov(rax, ptr [rsp + i * 8]);
mov(ptr[sf.p[0] + FpByte_ + i * 8], rax);
}
}
void gen_fp2_neg4()
{
assert(!isFullBit_);

Loading…
Cancel
Save