remove cast of function

2merge^2
MITSUNARI Shigeo 4 years ago
commit bacb6573fc
  1. 169
      include/mcl/fp.hpp
  2. 379
      include/mcl/fp_tower.hpp
  3. 1
      include/mcl/gmp_util.hpp
  4. 2
      include/mcl/op.hpp
  5. 17
      include/mcl/util.hpp
  6. 12
      test/common_test.hpp

@ -100,6 +100,47 @@ private:
template<class Fp> friend class FpDblT;
template<class Fp> friend class Fp2T;
template<class Fp> friend struct Fp6T;
#ifdef MCL_XBYAK_DIRECT_CALL
static inline void addA(Unit *z, const Unit *x, const Unit *y)
{
op_.fp_add(z, x, y, op_.p);
}
static inline void subA(Unit *z, const Unit *x, const Unit *y)
{
op_.fp_sub(z, x, y, op_.p);
}
static inline void negA(Unit *y, const Unit *x)
{
op_.fp_neg(y, x, op_.p);
}
static inline void mulA(Unit *z, const Unit *x, const Unit *y)
{
op_.fp_mul(z, x, y, op_.p);
}
static inline void sqrA(Unit *y, const Unit *x)
{
op_.fp_sqr(y, x, op_.p);
}
static inline void mul2A(Unit *y, const Unit *x)
{
op_.fp_mul2(y, x, op_.p);
}
#endif
static inline void mul9A(Unit *y, const Unit *x)
{
mulSmall(y, x, 9);
// op_.fp_mul9(y, x, op_.p);
}
static inline void mulSmall(Unit *z, const Unit *x, const uint32_t y)
{
assert(y <= op_.smallModp.maxMulN);
Unit xy[maxSize + 1];
op_.fp_mulUnitPre(xy, x, y);
int v = op_.smallModp.approxMul(xy);
const Unit *pv = op_.smallModp.getPmul(v);
op_.fp_subPre(z, xy, pv);
op_.fp_sub(z, z, op_.p, op_.p);
}
public:
typedef FpT<tag, maxBitSize> BaseFp;
// return pointer to array v_[]
@ -146,20 +187,27 @@ public:
ioMode_ = 0;
isETHserialization_ = false;
#ifdef MCL_XBYAK_DIRECT_CALL
add = fp::func_ptr_cast<void (*)(FpT& z, const FpT& x, const FpT& y)>(op_.fp_addA_);
if (add == 0) add = addC;
sub = fp::func_ptr_cast<void (*)(FpT& z, const FpT& x, const FpT& y)>(op_.fp_subA_);
if (sub == 0) sub = subC;
neg = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_negA_);
if (neg == 0) neg = negC;
mul = fp::func_ptr_cast<void (*)(FpT& z, const FpT& x, const FpT& y)>(op_.fp_mulA_);
if (mul == 0) mul = mulC;
sqr = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_sqrA_);
if (sqr == 0) sqr = sqrC;
mul2 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul2A_);
if (mul2 == 0) mul2 = mul2C;
mul9 = fp::func_ptr_cast<void (*)(FpT& y, const FpT& x)>(op_.fp_mul9A_);
if (mul9 == 0) mul9 = mul9C;
if (op_.fp_addA_ == 0) {
op_.fp_addA_ = addA;
}
if (op_.fp_subA_ == 0) {
op_.fp_subA_ = subA;
}
if (op_.fp_negA_ == 0) {
op_.fp_negA_ = negA;
}
if (op_.fp_mulA_ == 0) {
op_.fp_mulA_ = mulA;
}
if (op_.fp_sqrA_ == 0) {
op_.fp_sqrA_ = sqrA;
}
if (op_.fp_mul2A_ == 0) {
op_.fp_mul2A_ = mul2A;
}
if (op_.fp_mul9A_ == 0) {
op_.fp_mul9A_ = mul9A;
}
#endif
*pb = true;
}
@ -227,7 +275,7 @@ public:
} else {
clear();
if (x) {
int64_t y = x < 0 ? -x : x;
uint64_t y = fp::abs_(x);
if (sizeof(Unit) == 8) {
v_[0] = y;
} else {
@ -518,41 +566,67 @@ public:
}
setArray(pb, gmp::getUnit(x), gmp::getUnitSize(x));
}
static void add(FpT& z, const FpT& x, const FpT& y)
{
#ifdef MCL_XBYAK_DIRECT_CALL
static void (*add)(FpT& z, const FpT& x, const FpT& y);
static inline void addC(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
static void (*sub)(FpT& z, const FpT& x, const FpT& y);
static inline void subC(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); }
static void (*neg)(FpT& y, const FpT& x);
static inline void negC(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_, op_.p); }
static void (*mul)(FpT& z, const FpT& x, const FpT& y);
static inline void mulC(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
static void (*sqr)(FpT& y, const FpT& x);
static inline void sqrC(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
static void (*mul2)(FpT& y, const FpT& x);
static inline void mul2C(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
static void (*mul9)(FpT& y, const FpT& x);
static inline void mul9C(FpT& y, const FpT& x) { mulSmall(y, x, 9); }
op_.fp_addA_(z.v_, x.v_, y.v_);
#else
static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_, op_.p); }
static inline void neg(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_, op_.p); }
static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_, op_.p); }
static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_, op_.p); }
static inline void mul2(FpT& y, const FpT& x) { op_.fp_mul2(y.v_, x.v_, op_.p); }
static inline void mul9(FpT& y, const FpT& x) { mulSmall(y, x, 9); }
op_.fp_add(z.v_, x.v_, y.v_, op_.p);
#endif
}
static void sub(FpT& z, const FpT& x, const FpT& y)
{
#ifdef MCL_XBYAK_DIRECT_CALL
op_.fp_subA_(z.v_, x.v_, y.v_);
#else
op_.fp_sub(z.v_, x.v_, y.v_, op_.p);
#endif
}
static void neg(FpT& y, const FpT& x)
{
#ifdef MCL_XBYAK_DIRECT_CALL
op_.fp_negA_(y.v_, x.v_);
#else
op_.fp_neg(y.v_, x.v_, op_.p);
#endif
}
static void mul(FpT& z, const FpT& x, const FpT& y)
{
#ifdef MCL_XBYAK_DIRECT_CALL
op_.fp_mulA_(z.v_, x.v_, y.v_);
#else
op_.fp_mul(z.v_, x.v_, y.v_, op_.p);
#endif
}
static void sqr(FpT& y, const FpT& x)
{
#ifdef MCL_XBYAK_DIRECT_CALL
op_.fp_sqrA_(y.v_, x.v_);
#else
op_.fp_sqr(y.v_, x.v_, op_.p);
#endif
}
static void mul2(FpT& y, const FpT& x)
{
#ifdef MCL_XBYAK_DIRECT_CALL
op_.fp_mul2A_(y.v_, x.v_);
#else
op_.fp_mul2(y.v_, x.v_, op_.p);
#endif
}
static void mul9(FpT& y, const FpT& x)
{
#ifdef MCL_XBYAK_DIRECT_CALL
op_.fp_mul9A_(y.v_, x.v_);
#else
mul9A(y.v_, x.v_);
#endif
}
static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); }
static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); }
static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y)
{
assert(y <= op_.smallModp.maxMulN);
Unit xy[maxSize + 1];
op_.fp_mulUnitPre(xy, x.v_, y);
int v = op_.smallModp.approxMul(xy);
const Unit *pv = op_.smallModp.getPmul(v);
op_.fp_subPre(z.v_, xy, pv);
op_.fp_sub(z.v_, z.v_, op_.p, op_.p);
mulSmall(z.v_, x.v_, y);
}
static inline void mulUnit(FpT& z, const FpT& x, const Unit y)
{
@ -788,15 +862,6 @@ template<class tag, size_t maxBitSize> fp::Op FpT<tag, maxBitSize>::op_;
template<class tag, size_t maxBitSize> FpT<tag, maxBitSize> FpT<tag, maxBitSize>::inv2_;
template<class tag, size_t maxBitSize> int FpT<tag, maxBitSize>::ioMode_ = IoAuto;
template<class tag, size_t maxBitSize> bool FpT<tag, maxBitSize>::isETHserialization_ = false;
#ifdef MCL_XBYAK_DIRECT_CALL
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::add)(FpT& z, const FpT& x, const FpT& y);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sub)(FpT& z, const FpT& x, const FpT& y);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::neg)(FpT& y, const FpT& x);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul)(FpT& z, const FpT& x, const FpT& y);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::sqr)(FpT& y, const FpT& x);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul2)(FpT& y, const FpT& x);
template<class tag, size_t maxBitSize> void (*FpT<tag, maxBitSize>::mul9)(FpT& y, const FpT& x);
#endif
} // mcl

@ -113,37 +113,42 @@ public:
{
gmp::setArray(pb, x, v_, Fp::op_.N * 2);
}
static inline void add(FpDblT& z, const FpDblT& x, const FpDblT& y)
{
#ifdef MCL_XBYAK_DIRECT_CALL
static void (*add)(FpDblT& z, const FpDblT& x, const FpDblT& y);
static void (*sub)(FpDblT& z, const FpDblT& x, const FpDblT& y);
static void (*mod)(Fp& z, const FpDblT& xy);
static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
Fp::op_.fpDbl_addA_(z.v_, x.v_, y.v_);
#else
static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p);
#endif
/*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/
static void mulPre(FpDblT& xy, const Fp& x, const Fp& y)
}
static inline void sub(FpDblT& z, const FpDblT& x, const FpDblT& y)
{
const mcl::fp::Op& op = Fp::getOp();
op.fpDbl_mulPre(xy.v_, x.v_, y.v_);
#ifdef MCL_XBYAK_DIRECT_CALL
Fp::op_.fpDbl_subA_(z.v_, x.v_, y.v_);
#else
Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p);
#endif
}
static void sqrPre(FpDblT& xx, const Fp& x)
static inline void mod(Fp& z, const FpDblT& xy)
{
const mcl::fp::Op& op = Fp::getOp();
op.fpDbl_sqrPre(xx.v_, x.v_);
#ifdef MCL_XBYAK_DIRECT_CALL
Fp::op_.fpDbl_modA_(z.v_, xy.v_);
#else
Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p);
#endif
}
#ifdef MCL_XBYAK_DIRECT_CALL
static void addA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_add(z, x, y, Fp::op_.p); }
static void subA(Unit *z, const Unit *x, const Unit *y) { Fp::op_.fpDbl_sub(z, x, y, Fp::op_.p); }
static void modA(Unit *z, const Unit *xy) { Fp::op_.fpDbl_mod(z, xy, Fp::op_.p); }
#endif
static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
/*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/
static void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
static void sqrPre(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); }
static void mulUnit(FpDblT& z, const FpDblT& x, Unit y)
{
if (mulSmallUnit(z, x, y)) return;
@ -152,31 +157,22 @@ public:
static void init()
{
#ifdef MCL_XBYAK_DIRECT_CALL
const mcl::fp::Op& op = Fp::getOp();
add = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_addA_);
if (add == 0) add = addC;
sub = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subA_);
if (sub == 0) sub = subC;
mod = fp::func_ptr_cast<void (*)(Fp&, const FpDblT&)>(op.fpDbl_modA_);
if (mod == 0) mod = modC;
addPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_addPre);
if (addPre == 0) addPre = addPreC;
subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre);
if (subPre == 0) subPre = subPreC;
mcl::fp::Op& op = Fp::op_;
if (op.fpDbl_addA_ == 0) {
op.fpDbl_addA_ = addA;
}
if (op.fpDbl_subA_ == 0) {
op.fpDbl_subA_ = subA;
}
if (op.fpDbl_modA_ == 0) {
op.fpDbl_modA_ = modA;
}
#endif
}
void operator+=(const FpDblT& x) { add(*this, *this, x); }
void operator-=(const FpDblT& x) { sub(*this, *this, x); }
};
#ifdef MCL_XBYAK_DIRECT_CALL
template<class Fp> void (*FpDblT<Fp>::add)(FpDblT&, const FpDblT&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::sub)(FpDblT&, const FpDblT&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::mod)(Fp&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::addPre)(FpDblT&, const FpDblT&, const FpDblT&);
template<class Fp> void (*FpDblT<Fp>::subPre)(FpDblT&, const FpDblT&, const FpDblT&);
#endif
/*
beta = -1
Fp2 = F[i] / (i^2 + 1)
@ -227,24 +223,79 @@ public:
a = a_;
b = b_;
}
static void add(Fp2T& z, const Fp2T& x, const Fp2T& y)
{
#ifdef MCL_XBYAK_DIRECT_CALL
static void (*add)(Fp2T& z, const Fp2T& x, const Fp2T& y);
static void (*sub)(Fp2T& z, const Fp2T& x, const Fp2T& y);
static void (*neg)(Fp2T& y, const Fp2T& x);
static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
static void (*sqr)(Fp2T& y, const Fp2T& x);
static void (*mul2)(Fp2T& y, const Fp2T& x);
Fp::op_.fp2_addA_(z.a.v_, x.a.v_, y.a.v_);
#else
static void add(Fp2T& z, const Fp2T& x, const Fp2T& y) { addC(z, x, y); }
static void sub(Fp2T& z, const Fp2T& x, const Fp2T& y) { subC(z, x, y); }
static void neg(Fp2T& y, const Fp2T& x) { negC(y, x); }
static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) { mulC(z, x, y); }
static void sqr(Fp2T& y, const Fp2T& x) { sqrC(y, x); }
static void mul2(Fp2T& y, const Fp2T& x) { mul2C(y, x); }
addA(z.a.v_, x.a.v_, y.a.v_);
#endif
static void (*mul_xi)(Fp2T& y, const Fp2T& x);
static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); }
static void inv(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_inv(y.a.v_, x.a.v_); }
}
static void sub(Fp2T& z, const Fp2T& x, const Fp2T& y)
{
#ifdef MCL_XBYAK_DIRECT_CALL
Fp::op_.fp2_subA_(z.a.v_, x.a.v_, y.a.v_);
#else
subA(z.a.v_, x.a.v_, y.a.v_);
#endif
}
static void neg(Fp2T& y, const Fp2T& x)
{
#ifdef MCL_XBYAK_DIRECT_CALL
Fp::op_.fp2_negA_(y.a.v_, x.a.v_);
#else
negA(y.a.v_, x.a.v_);
#endif
}
static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y)
{
#ifdef MCL_XBYAK_DIRECT_CALL
Fp::op_.fp2_mulA_(z.a.v_, x.a.v_, y.a.v_);
#else
mulA(z.a.v_, x.a.v_, y.a.v_);
#endif
}
static void sqr(Fp2T& y, const Fp2T& x)
{
#ifdef MCL_XBYAK_DIRECT_CALL
Fp::op_.fp2_sqrA_(y.a.v_, x.a.v_);
#else
sqrA(y.a.v_, x.a.v_);
#endif
}
static void mul2(Fp2T& y, const Fp2T& x)
{
#ifdef MCL_XBYAK_DIRECT_CALL
Fp::op_.fp2_mul2A_(y.a.v_, x.a.v_);
#else
mul2A(y.a.v_, x.a.v_);
#endif
}
static void mul_xi(Fp2T& y, const Fp2T& x)
{
Fp::op_.fp2_mul_xiA_(y.a.v_, x.a.v_);
}
/*
x = a + bi
1 / x = (a - bi) / (a^2 + b^2)
*/
static void inv(Fp2T& y, const Fp2T& x)
{
assert(!x.isZero());
const Fp& a = x.a;
const Fp& b = x.b;
Fp r;
norm(r, x);
Fp::inv(r, r); // r = 1 / (a^2 + b^2)
Fp::mul(y.a, a, r);
Fp::mul(y.b, b, r);
Fp::neg(y.b, y.b);
}
static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y)
{
Fp::addPre(z.a, x.a, y.a);
Fp::addPre(z.b, x.b, y.b);
}
static void divBy2(Fp2T& y, const Fp2T& x)
{
Fp::divBy2(y.a, x.a);
@ -349,12 +400,14 @@ public:
Fp::mul(y.b, x.b, t2);
return true;
}
// y = a^2 + b^2
static void inline norm(Fp& y, const Fp2T& x)
{
Fp aa, bb;
Fp::sqr(aa, x.a);
Fp::sqr(bb, x.b);
Fp::add(y, aa, bb);
FpDbl AA, BB;
FpDbl::sqrPre(AA, x.a);
FpDbl::sqrPre(BB, x.b);
FpDbl::addPre(AA, AA, BB);
FpDbl::mod(y, AA);
}
/*
Frobenius
@ -380,7 +433,6 @@ public:
static uint32_t get_xi_a() { return Fp::getOp().xi_a; }
static void init(bool *pb)
{
// assert(Fp::maxSize <= 256);
mcl::fp::Op& op = Fp::op_;
assert(op.xi_a);
// assume p < W/4 where W = 1 << (N * sizeof(Unit) * 8)
@ -388,28 +440,31 @@ public:
*pb = false;
return;
}
mul_xi = 0;
#ifdef MCL_XBYAK_DIRECT_CALL
add = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_addA_);
if (add == 0) add = addC;
sub = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_subA_);
if (sub == 0) sub = subC;
neg = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_negA_);
if (neg == 0) neg = negC;
mul = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_mulA_);
if (mul == 0) mul = mulC;
sqr = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_sqrA_);
if (sqr == 0) sqr = sqrC;
mul2 = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_mul2A_);
if (mul2 == 0) mul2 = mul2C;
mul_xi = fp::func_ptr_cast<void (*)(Fp2T&, const Fp2T&)>(op.fp2_mul_xiA_);
if (op.fp2_addA_ == 0) {
op.fp2_addA_ = addA;
}
if (op.fp2_subA_ == 0) {
op.fp2_subA_ = subA;
}
if (op.fp2_negA_ == 0) {
op.fp2_negA_ = negA;
}
if (op.fp2_mulA_ == 0) {
op.fp2_mulA_ = mulA;
}
if (op.fp2_sqrA_ == 0) {
op.fp2_sqrA_ = sqrA;
}
if (op.fp2_mul2A_ == 0) {
op.fp2_mul2A_ = mul2A;
}
#endif
op.fp2_inv = fp2_invW;
if (mul_xi == 0) {
if (op.fp2_mul_xiA_ == 0) {
if (op.xi_a == 1) {
mul_xi = fp2_mul_xi_1_1iC;
op.fp2_mul_xiA_ = fp2_mul_xi_1_1iA;
} else {
mul_xi = fp2_mul_xiC;
op.fp2_mul_xiA_ = fp2_mul_xiA;
}
}
FpDblT<Fp>::init();
@ -442,6 +497,7 @@ public:
Fp2T::mul(g2[i], t, g[i]);
g3[i] = g[i] * g2[i];
}
*pb = true;
}
#ifndef CYBOZU_DONT_USE_EXCEPTION
static void init()
@ -486,43 +542,60 @@ public:
}
#endif
private:
static Fp2T& cast(Unit *x) { return *reinterpret_cast<Fp2T*>(x); }
static const Fp2T& cast(const Unit *x) { return *reinterpret_cast<const Fp2T*>(x); }
/*
default Fp2T operator
Fp2T = Fp[i]/(i^2 + 1)
*/
static void addC(Fp2T& z, const Fp2T& x, const Fp2T& y)
static void addA(Unit *pz, const Unit *px, const Unit *py)
{
Fp2T& z = cast(pz);
const Fp2T& x = cast(px);
const Fp2T& y = cast(py);
Fp::add(z.a, x.a, y.a);
Fp::add(z.b, x.b, y.b);
}
static void subC(Fp2T& z, const Fp2T& x, const Fp2T& y)
static void subA(Unit *pz, const Unit *px, const Unit *py)
{
Fp2T& z = cast(pz);
const Fp2T& x = cast(px);
const Fp2T& y = cast(py);
Fp::sub(z.a, x.a, y.a);
Fp::sub(z.b, x.b, y.b);
}
static void negC(Fp2T& y, const Fp2T& x)
static void negA(Unit *py, const Unit *px)
{
Fp2T& y = cast(py);
const Fp2T& x = cast(px);
Fp::neg(y.a, x.a);
Fp::neg(y.b, x.b);
}
static void mul2C(Fp2T& y, const Fp2T& x)
{
Fp::mul2(y.a, x.a);
Fp::mul2(y.b, x.b);
}
static void mulC(Fp2T& z, const Fp2T& x, const Fp2T& y)
static void mulA(Unit *pz, const Unit *px, const Unit *py)
{
Fp2T& z = cast(pz);
const Fp2T& x = cast(px);
const Fp2T& y = cast(py);
Fp2Dbl d;
Fp2Dbl::mulPre(d, x, y);
FpDbl::mod(z.a, d.a);
FpDbl::mod(z.b, d.b);
}
static void mul2A(Unit *py, const Unit *px)
{
Fp2T& y = cast(py);
const Fp2T& x = cast(px);
Fp::mul2(y.a, x.a);
Fp::mul2(y.b, x.b);
}
/*
x = a + bi, i^2 = -1
y = x^2 = (a + bi)^2 = (a + b)(a - b) + 2abi
*/
static void sqrC(Fp2T& y, const Fp2T& x)
static void sqrA(Unit *py, const Unit *px)
{
Fp2T& y = cast(py);
const Fp2T& x = cast(px);
const Fp& a = x.a;
const Fp& b = x.b;
#if 1 // faster than using FpDbl
@ -551,8 +624,10 @@ private:
y = (a + bi)xi = (a + bi)(xi_a + i)
=(a * x_ia - b) + (a + b xi_a)i
*/
static void fp2_mul_xiC(Fp2T& y, const Fp2T& x)
static void fp2_mul_xiA(Unit *py, const Unit *px)
{
Fp2T& y = cast(py);
const Fp2T& x = cast(px);
const Fp& a = x.a;
const Fp& b = x.b;
Fp t;
@ -566,8 +641,10 @@ private:
xi = 1 + i ; xi_a = 1
y = (a + bi)xi = (a - b) + (a + b)i
*/
static void fp2_mul_xi_1_1iC(Fp2T& y, const Fp2T& x)
static void fp2_mul_xi_1_1iA(Unit *py, const Unit *px)
{
Fp2T& y = cast(py);
const Fp2T& x = cast(px);
const Fp& a = x.a;
const Fp& b = x.b;
Fp t;
@ -575,39 +652,11 @@ private:
Fp::sub(y.a, a, b);
y.b = t;
}
/*
x = a + bi
1 / x = (a - bi) / (a^2 + b^2)
*/
static void fp2_invW(Unit *y, const Unit *x)
{
const Fp *px = reinterpret_cast<const Fp*>(x);
Fp *py = reinterpret_cast<Fp*>(y);
const Fp& a = px[0];
const Fp& b = px[1];
Fp aa, bb;
Fp::sqr(aa, a);
Fp::sqr(bb, b);
aa += bb;
Fp::inv(aa, aa); // aa = 1 / (a^2 + b^2)
Fp::mul(py[0], a, aa);
Fp::mul(py[1], b, aa);
Fp::neg(py[1], py[1]);
}
};
#ifdef MCL_XBYAK_DIRECT_CALL
template<class Fp_> void (*Fp2T<Fp_>::add)(Fp2T& z, const Fp2T& x, const Fp2T& y);
template<class Fp_> void (*Fp2T<Fp_>::sub)(Fp2T& z, const Fp2T& x, const Fp2T& y);
template<class Fp_> void (*Fp2T<Fp_>::neg)(Fp2T& y, const Fp2T& x);
template<class Fp_> void (*Fp2T<Fp_>::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
template<class Fp_> void (*Fp2T<Fp_>::sqr)(Fp2T& y, const Fp2T& x);
template<class Fp_> void (*Fp2T<Fp_>::mul2)(Fp2T& y, const Fp2T& x);
#endif
template<class Fp_> void (*Fp2T<Fp_>::mul_xi)(Fp2T& y, const Fp2T& x);
template<class Fp>
struct Fp2DblT {
typedef Fp2DblT<Fp> Fp2Dbl;
typedef FpDblT<Fp> FpDbl;
typedef Fp2T<Fp> Fp2;
typedef fp::Unit Unit;
@ -646,30 +695,18 @@ struct Fp2DblT {
FpDbl::neg(y.a, x.a);
FpDbl::neg(y.b, x.b);
}
static void mul_xi_1C(Fp2DblT& y, const Fp2DblT& x)
static void mulPre(Fp2DblT& z, const Fp2& x, const Fp2& y)
{
FpDbl t;
FpDbl::add(t, x.a, x.b);
FpDbl::sub(y.a, x.a, x.b);
y.b = t;
Fp::getOp().fp2Dbl_mulPreA_(z.a.v_, x.getUnit(), y.getUnit());
}
static void mul_xi_genericC(Fp2DblT& y, const Fp2DblT& x)
static void sqrPre(Fp2DblT& y, const Fp2& x)
{
const uint32_t xi_a = Fp2::get_xi_a();
FpDbl t;
FpDbl::mulUnit(t, x.a, xi_a);
FpDbl::sub(t, t, x.b);
FpDbl::mulUnit(y.b, x.b, xi_a);
FpDbl::add(y.b, y.b, x.a);
y.a = t;
Fp::getOp().fp2Dbl_sqrPreA_(y.a.v_, x.getUnit());
}
static void (*mulPre)(Fp2DblT&, const Fp2&, const Fp2&);
static void sqrPre(Fp2DblT& y, const Fp2& x)
static void mul_xi(Fp2DblT& y, const Fp2DblT& x)
{
const mcl::fp::Op& op = Fp::getOp();
op.fp2Dbl_sqrPreA_(y.a.v_, x.getUnit());
Fp::getOp().fp2Dbl_mul_xiA_(y.a.v_, x.a.getUnit());
}
static void (*mul_xi)(Fp2DblT&, const Fp2DblT&);
static void mod(Fp2& y, const Fp2DblT& x)
{
FpDbl::mod(y.a, x.a);
@ -687,33 +724,35 @@ struct Fp2DblT {
{
assert(!Fp::getOp().isFullBit);
mcl::fp::Op& op = Fp::getOpNonConst();
if (op.fp2Dbl_mulPreA_) {
mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_);
} else {
mulPre = fp2Dbl_mulPreW;
if (op.fp2Dbl_mulPreA_ == 0) {
op.fp2Dbl_mulPreA_ = mulPreA;
}
if (op.fp2Dbl_sqrPreA_ == 0) {
op.fp2Dbl_sqrPreA_ = fp2Dbl_sqrPreC;
op.fp2Dbl_sqrPreA_ = sqrPreA;
}
if (op.fp2Dbl_mul_xiA_ == 0) {
const uint32_t xi_a = Fp2::get_xi_a();
switch (xi_a) {
case 1:
mul_xi = mul_xi_1C;
if (op.fp2Dbl_mul_xiA_) {
mul_xi = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2DblT&)>(op.fp2Dbl_mul_xiA_);
if (xi_a == 1) {
op.fp2Dbl_mul_xiA_ = mul_xi_1A;
} else {
op.fp2Dbl_mul_xiA_ = mul_xi_genericA;
}
break;
default:
mul_xi = mul_xi_genericC;
break;
}
}
private:
static Fp2 cast(Unit *x) { return *reinterpret_cast<Fp2*>(x); }
static const Fp2 cast(const Unit *x) { return *reinterpret_cast<const Fp2*>(x); }
static Fp2Dbl& castD(Unit *x) { return *reinterpret_cast<Fp2Dbl*>(x); }
static const Fp2Dbl& castD(const Unit *x) { return *reinterpret_cast<const Fp2Dbl*>(x); }
/*
Fp2Dbl::mulPre by FpDblT
@note mod of NIST_P192 is fast
*/
static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y)
static void mulPreA(Unit *pz, const Unit *px, const Unit *py)
{
Fp2Dbl& z = castD(pz);
const Fp2& x = cast(px);
const Fp2& y = cast(py);
assert(!Fp::getOp().isFullBit);
const Fp& a = x.a;
const Fp& b = x.b;
@ -732,11 +771,11 @@ struct Fp2DblT {
FpDbl::subPre(d1, d1, d2);
FpDbl::sub(d0, d0, d2); // ac - bd
}
static void fp2Dbl_sqrPreC(Unit *py, const Unit *px)
static void sqrPreA(Unit *py, const Unit *px)
{
assert(!Fp::getOp().isFullBit);
const Fp2& x = *reinterpret_cast<const Fp2*>(px);
Fp2DblT& y = *reinterpret_cast<Fp2DblT*>(py);
Fp2Dbl& y = castD(py);
const Fp2& x = cast(px);
Fp t1, t2;
Fp::addPre(t1, x.b, x.b); // 2b
Fp::addPre(t2, x.a, x.b); // a + b
@ -744,11 +783,29 @@ struct Fp2DblT {
Fp::sub(t1, x.a, x.b); // a - b
FpDbl::mulPre(y.a, t1, t2); // (a + b)(a - b)
}
static void mul_xi_1A(Unit *py, const Unit *px)
{
Fp2Dbl& y = castD(py);
const Fp2Dbl& x = castD(px);
FpDbl t;
FpDbl::add(t, x.a, x.b);
FpDbl::sub(y.a, x.a, x.b);
y.b = t;
}
static void mul_xi_genericA(Unit *py, const Unit *px)
{
const uint32_t xi_a = Fp2::get_xi_a();
Fp2Dbl& y = castD(py);
const Fp2Dbl& x = castD(px);
FpDbl t;
FpDbl::mulUnit(t, x.a, xi_a);
FpDbl::sub(t, t, x.b);
FpDbl::mulUnit(y.b, x.b, xi_a);
FpDbl::add(y.b, y.b, x.a);
y.a = t;
}
};
template<class Fp> void (*Fp2DblT<Fp>::mulPre)(Fp2DblT&, const Fp2T<Fp>&, const Fp2T<Fp>&);
template<class Fp> void (*Fp2DblT<Fp>::mul_xi)(Fp2DblT<Fp>&, const Fp2DblT<Fp>&);
template<class Fp> Fp2T<Fp> Fp2T<Fp>::g[Fp2T<Fp>::gN];
template<class Fp> Fp2T<Fp> Fp2T<Fp>::g2[Fp2T<Fp>::gN];
template<class Fp> Fp2T<Fp> Fp2T<Fp>::g3[Fp2T<Fp>::gN];

@ -960,6 +960,7 @@ struct SmallModp {
}
uint32_t getTop(const Unit *x) const
{
if (shiftR_ == 0) return x[N_ - 1];
return (x[N_ - 1] >> shiftR_) | (x[N_] << shiftL_);
}
uint32_t cvtInt(const mpz_class& x) const

@ -258,7 +258,6 @@ struct Op {
*/
int xi_a; // xi = xi_a + u
void4u fp2_mulNF;
void2u fp2_inv;
void2u fp2_mul_xiA_;
uint32_t (*hash)(void *out, uint32_t maxOutSize, const void *msg, uint32_t msgSize);
@ -345,7 +344,6 @@ struct Op {
xi_a = 0;
fp2_mulNF = 0;
fp2_inv = 0;
fp2_mul_xiA_ = 0;
hash = 0;

@ -17,8 +17,21 @@
namespace mcl { namespace fp {
// some environments do not have utility
template<class T>
T abs_(T x) { return x < 0 ? -x : x; }
inline uint32_t abs_(int32_t x)
{
if (x >= 0) return uint32_t(x);
// avoid undefined behavior
if (x == -2147483647 - 1) return 2147483648u;
return uint32_t(-x);
}
inline uint64_t abs_(int64_t x)
{
if (x >= 0) return uint64_t(x);
// avoid undefined behavior
if (x == -9223372036854775807ll - 1) return 9223372036854775808ull;
return uint64_t(-x);
}
template<class T>
T min_(T x, T y) { return x < y ? x : y; }

@ -163,10 +163,11 @@ void testABCD()
void testFp2Dbl_mul_xi1()
{
if (Fp2::get_xi_a() != 1) return;
const uint32_t xi_a = Fp2::get_xi_a();
if (xi_a != 1) return;
puts("testFp2Dbl_mul_xi1");
cybozu::XorShift rg;
for (int i = 0; i < 100; i++) {
for (int i = 0; i < 10; i++) {
Fp a1, a2;
a1.setByCSPRNG(rg);
a2.setByCSPRNG(rg);
@ -176,7 +177,12 @@ void testFp2Dbl_mul_xi1()
a2.setByCSPRNG(rg);
FpDbl::mulPre(x.b, a1, a2);
Fp2Dbl ok;
Fp2Dbl::mul_xi_1C(ok, x);
{
FpDbl::mulUnit(ok.a, x.a, xi_a);
ok.a -= x.b;
FpDbl::mulUnit(ok.b, x.b, xi_a);
ok.b += x.a;
}
Fp2Dbl::mul_xi(x, x);
CYBOZU_TEST_EQUAL_ARRAY(ok.a.getUnit(), x.a.getUnit(), ok.a.getUnitSize());
CYBOZU_TEST_EQUAL_ARRAY(ok.b.getUnit(), x.b.getUnit(), ok.b.getUnitSize());

Loading…
Cancel
Save