refactoring setup

dev
MITSUNARI Shigeo 8 years ago
parent 079617adf0
commit cd1257d109
  1. 6
      sample/large.cpp
  2. 16
      sample/rawbench.cpp
  3. 227
      src/fp.cpp
  4. 130
      src/fp_proto.hpp
  5. 81
      src/gen.cpp
  6. 8
      src/low_gmp.hpp

@ -108,11 +108,11 @@ void test(const std::string& pStr, mcl::fp::Mode mode)
}
CYBOZU_BENCH("mulPre", op.fpDbl_mulPre, ux, ux, uy);
CYBOZU_BENCH("sqrPre", op.fpDbl_sqrPre, ux, ux);
CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux);
CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux);
CYBOZU_BENCH("add", op.fpDbl_add, ux, ux, ux, op.p);
CYBOZU_BENCH("sub", op.fpDbl_sub, ux, ux, ux, op.p);
CYBOZU_BENCH("addNC", op.fpDbl_addNC, ux, ux, ux);
CYBOZU_BENCH("subNC", op.fpDbl_subNC, ux, ux, ux);
CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux);
CYBOZU_BENCH("mont", op.fpDbl_mod, ux, ux, op.p);
CYBOZU_BENCH("mul", Fp::mul, x, x, x);
compareGmp(pStr);
}

@ -36,19 +36,19 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
double fpDbl_addT, fpDbl_subT;
double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT;
double fp2_sqrT, fp2_mulT;
CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy);
CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux);
CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy, op.p);
CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux, op.p);
CYBOZU_BENCH_T(fp_addNCT, op.fp_addNC, uz, ux, uy);
CYBOZU_BENCH_T(fp_subNCT, op.fp_subNC, uz, uy, ux);
CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux);
CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy);
CYBOZU_BENCH_T(fp_mul_UnitT, op.fp_mul_Unit, uz, ux, 12345678);
CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, uz, ux, op.p);
CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy, op.p);
CYBOZU_BENCH_T(fp_mul_UnitT, op.fp_mul_Unit, uz, ux, 12345678, op.p);
CYBOZU_BENCH_T(fp_mul_UnitPreT, op.fp_mul_UnitPre, ux, ux, 12345678);
CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy);
CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux);
CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy, op.p);
CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux, op.p);
CYBOZU_BENCH_T(fpDbl_sqrPreT, op.fpDbl_sqrPre, uz, ux);
CYBOZU_BENCH_T(fpDbl_mulPreT, op.fpDbl_mulPre, uz, ux, uy);
CYBOZU_BENCH_T(fpDbl_modT, op.fpDbl_mod, uz, ux);
CYBOZU_BENCH_T(fpDbl_modT, op.fpDbl_mod, uz, ux, op.p);
Fp2 f2x, f2y;
f2x.a = fx;
f2x.b = fy;

@ -107,6 +107,70 @@ Mode StrToMode(const std::string& s)
throw cybozu::Exception("StrToMode") << s;
}
#ifdef MCL_USE_LLVM
#define MCL_DEF_LLVM_FUNC(bit) \
template<>const u3u AddNC<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_addNC ## bit ## L; \
template<>const u3u SubNC<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_subNC ## bit ## L; \
template<>const void3u MulPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_mulPre ## bit ## L; \
template<>const void2u SqrPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_sqrPre ## bit ## L; \
template<>const void2uI Mul_UnitPre<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_mul_UnitPre ## bit ## L; \
template<>const void4u Add<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_add ## bit ## L; \
template<>const void4u Sub<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_sub ## bit ## L; \
template<>const void4u Mont<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_mont ## bit ## L; \
template<>const void3u MontRed<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fp_montRed ## bit ## L; \
template<>const void4u DblAdd<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_add ## bit ## L; \
template<>const void4u DblSub<bit / mcl::fp::UnitBitSize, Ltag>::f = &mcl_fpDbl_sub ## bit ## L; \
template<size_t N>
struct Mul<N, Ltag> {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
Unit xy[N * 2];
MulPre<N, Ltag>::f(xy, x, y);
Dbl_Mod<N, Gtag>::f(z, xy, p);
}
static const void4u f;
};
template<size_t N>
const void4u Mul<N, Ltag>::f = Mul<N, Ltag>::func;
template<size_t N>
struct Sqr<N, Ltag> {
static inline void func(Unit *y, const Unit *x, const Unit *p)
{
Unit xx[N * 2];
SqrPre<N, Ltag>::f(xx, x);
Dbl_Mod<N, Gtag>::f(y, xx, p);
}
static const void3u f;
};
template<size_t N>
const void3u Sqr<N, Ltag>::f = Sqr<N, Ltag>::func;
MCL_DEF_LLVM_FUNC(64)
MCL_DEF_LLVM_FUNC(128)
MCL_DEF_LLVM_FUNC(192)
MCL_DEF_LLVM_FUNC(256)
MCL_DEF_LLVM_FUNC(320)
MCL_DEF_LLVM_FUNC(384)
MCL_DEF_LLVM_FUNC(448)
MCL_DEF_LLVM_FUNC(512)
#if CYBOZU_OS_BIT == 32
MCL_DEF_LLVM_FUNC(160)
MCL_DEF_LLVM_FUNC(224)
MCL_DEF_LLVM_FUNC(288)
MCL_DEF_LLVM_FUNC(352)
MCL_DEF_LLVM_FUNC(416)
MCL_DEF_LLVM_FUNC(480)
MCL_DEF_LLVM_FUNC(544)
#else
MCL_DEF_LLVM_FUNC(576)
#endif
#endif
template<size_t bitSize>
struct OpeFunc {
@ -136,43 +200,6 @@ struct OpeFunc {
{
copyArray(y, x, N);
}
static inline void fp_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
if (AddPre<N, Gtag>::f(z, x, y)) {
SubPre<N, Gtag>::f(z, z, p);
return;
}
Unit tmp[N];
if (SubPre<N, Gtag>::f(tmp, z, p) == 0) {
memcpy(z, tmp, sizeof(tmp));
}
}
static inline void fp_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
if (SubPre<N, Gtag>::f(z, x, y)) {
AddPre<N, Gtag>::f(z, z, p);
}
}
/*
z[N * 2] <- x[N * 2] + y[N * 2] mod p[N] << (N * UnitBitSize)
*/
static inline void fpDbl_addC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
if (AddPre<N * 2, Gtag>::f(z, x, y)) {
SubPre<N, Gtag>::f(z + N, z + N, p);
return;
}
Unit tmp[N];
if (SubPre<N, Gtag>::f(tmp, z + N, p) == 0) {
memcpy(z + N, tmp, sizeof(tmp));
}
}
static inline void fpDbl_subC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
if (SubPre<N * 2, Gtag>::f(z, x, y)) {
AddPre<N, Gtag>::f(z + N, z + N, p);
}
}
// z[N] <- mont(x[N], y[N])
static inline void fp_mulMontC(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
@ -189,20 +216,20 @@ struct OpeFunc {
Unit t[N + 2];
Mul_UnitPre<N, Gtag>::f(t, p, q); // p * q
t[N + 1] = 0; // always zero
c[N + 1] = AddPre<N + 1, Gtag>::f(c, c, t);
c[N + 1] = AddNC<N + 1, Gtag>::f(c, c, t);
c++;
for (size_t i = 1; i < N; i++) {
Mul_UnitPre<N, Gtag>::f(t, x, y[i]);
c[N + 1] = AddPre<N + 1, Gtag>::f(c, c, t);
c[N + 1] = AddNC<N + 1, Gtag>::f(c, c, t);
q = c[0] * rp;
Mul_UnitPre<N, Gtag>::f(t, p, q);
AddPre<N + 2, Gtag>::f(c, c, t);
AddNC<N + 2, Gtag>::f(c, c, t);
c++;
}
if (c[N]) {
SubPre<N, Gtag>::f(z, c, p);
SubNC<N, Gtag>::f(z, c, p);
} else {
if (SubPre<N, Gtag>::f(z, c, p)) {
if (SubNC<N, Gtag>::f(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
@ -221,7 +248,7 @@ struct OpeFunc {
Unit *c = buf;
Unit q = xy[0] * rp;
Mul_UnitPre<N, Gtag>::f(t, p, q);
buf[N * 2] = AddPre<N * 2, Gtag>::f(buf, xy, t);
buf[N * 2] = AddNC<N * 2, Gtag>::f(buf, xy, t);
c++;
for (size_t i = 1; i < N; i++) {
q = c[0] * rp;
@ -231,9 +258,9 @@ struct OpeFunc {
c++;
}
if (c[N]) {
SubPre<N, Gtag>::f(z, c, p);
SubNC<N, Gtag>::f(z, c, p);
} else {
if (SubPre<N, Gtag>::f(z, c, p)) {
if (SubNC<N, Gtag>::f(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
@ -289,39 +316,48 @@ struct OpeFunc {
if (x != y) fp_clearC(y);
return;
}
fp_subC(y, p, x, p);
SubNC<N, Gtag>::f(y, p, x);
}
};
#ifdef MCL_USE_LLVM
#define SET_OP_LLVM(bit) \
#define SET_OP_LLVM /* assume n */ \
if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \
fp_add = mcl_fp_add ## bit ## L; \
fp_sub = mcl_fp_sub ## bit ## L; \
if (!isFullBit) { \
fp_addNC = mcl_fp_addNC ## bit ## L; \
fp_subNC = mcl_fp_subNC ## bit ## L; \
} \
fpDbl_mulPre = mcl_fpDbl_mulPre ## bit ## L; \
fp_mul_UnitPre = mcl_fp_mul_UnitPre ## bit ## L; \
fpDbl_sqrPre = mcl_fpDbl_sqrPre ## bit ## L; \
fp_add = Add<n, Ltag>::f; \
fp_sub = Sub<n, Ltag>::f; \
fpDbl_add = DblAdd<n, Ltag>::f; \
fpDbl_sub = DblSub<n, Ltag>::f; \
if (mode == FP_LLVM_MONT) { \
fpDbl_mod = mcl_fp_montRed ## bit ## L; \
fp_mul = mcl_fp_mont ## bit ## L; \
fp_mul = Mont<n, Ltag>::f; \
fp_sqr = SqrMont<n, Ltag>::f; \
fpDbl_mod = MontRed<n, Ltag>::f; \
} else { \
fp_mul = Mul<n, Ltag>::f; \
fp_sqr = Sqr<n, Ltag>::f; \
} \
fpDbl_mulPre = MulPre<n, Ltag>::f; \
fpDbl_sqrPre = SqrPre<n, Ltag>::f; \
fp_mul_UnitPre = Mul_UnitPre<n, Ltag>::f; \
if (!isFullBit) { \
fp_addNC = AddNC<n, Ltag>::f; \
fp_subNC = SubNC<n, Ltag>::f; \
} \
}
#define SET_OP_DBL_LLVM(bit, n2) \
#define SET_OP_LLVM2(bit) \
{ \
const int n = bit / UnitBitSize; \
if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \
fpDbl_add = mcl_fpDbl_add ## bit ## L; \
fpDbl_sub = mcl_fpDbl_sub ## bit ## L; \
if (!isFullBit) { \
fpDbl_addNC = mcl_fp_addNC ## n2 ## L; \
fpDbl_subNC = mcl_fp_subNC ## n2 ## L; \
fpDbl_addNC = AddNC<n * 2, Ltag>::f; \
fpDbl_subNC = SubNC<n * 2, Ltag>::f; \
} \
}
} \
}
#else
#define SET_OP_LLVM(bit)
#define SET_OP_DBL_LLVM(bit, n2)
#define SET_OP_LLVM
#define SET_OP_LLVM2(bit)
#endif
#define SET_OP(bit) \
@ -332,8 +368,8 @@ struct OpeFunc {
fp_clear = OpeFunc<bit>::fp_clearC; \
fp_copy = OpeFunc<bit>::fp_copyC; \
fp_neg = OpeFunc<bit>::fp_negC; \
fp_add = OpeFunc<bit>::fp_addC; \
fp_sub = OpeFunc<bit>::fp_subC; \
fp_add = Add<n, Gtag>::f; \
fp_sub = Sub<n, Gtag>::f; \
if (isMont) { \
fp_mul = OpeFunc<bit>::fp_mulMontC; \
fp_sqr = OpeFunc<bit>::fp_sqrMontC; \
@ -350,15 +386,15 @@ struct OpeFunc {
fpDbl_sqrPre = SqrPre<n, Gtag>::f; \
fp_mul_UnitPre = Mul_UnitPre<n, Gtag>::f; \
fpN1_mod = N1_Mod<n, Gtag>::f; \
fpDbl_add = OpeFunc<bit>::fpDbl_addC; \
fpDbl_sub = OpeFunc<bit>::fpDbl_subC; \
fpDbl_add = DblAdd<n, Gtag>::f; \
fpDbl_sub = DblSub<n, Gtag>::f; \
if (!isFullBit) { \
fp_addNC = AddPre<n, Gtag>::f; \
fp_subNC = SubPre<n, Gtag>::f; \
fpDbl_addNC = AddPre<n * 2, Gtag>::f; \
fpDbl_subNC = SubPre<n * 2, Gtag>::f; \
fp_addNC = AddNC<n, Gtag>::f; \
fp_subNC = SubNC<n, Gtag>::f; \
fpDbl_addNC = AddNC<n * 2, Gtag>::f; \
fpDbl_subNC = SubNC<n * 2, Gtag>::f; \
} \
SET_OP_LLVM(bit) \
SET_OP_LLVM \
}
#ifdef MCL_USE_XBYAK
@ -476,41 +512,26 @@ void Op::init(const std::string& mstr, size_t maxBitSize, Mode mode)
}
#endif
switch (roundBit) {
case 64: SET_OP(64); SET_OP_DBL_LLVM(64, 128); break;
case 128: SET_OP(128); SET_OP_DBL_LLVM(128, 256); break;
case 192: SET_OP(192); SET_OP_DBL_LLVM(192, 384); break;
case 256: SET_OP(256); SET_OP_DBL_LLVM(256, 512); break;
case 64: SET_OP(64); SET_OP_LLVM2(64); break;
case 128: SET_OP(128); SET_OP_LLVM2(128); break;
case 192: SET_OP(192); SET_OP_LLVM2(192); break;
case 256: SET_OP(256); SET_OP_LLVM2(256); break;
case 320: SET_OP(320); break;
case 384: SET_OP(384); break;
case 448: SET_OP(448); break;
case 512: SET_OP(512);
// QQQ : need refactor for large prime
#if MCL_MAX_OP_BIT_SIZE == 768
SET_OP_DBL_LLVM(512, 1024);
#endif
break;
case 512: SET_OP(512); break;
#if CYBOZU_OS_BIT == 64
case 576: SET_OP(576);
#if MCL_MAX_OP_BIT_SIZE == 768
SET_OP_DBL_LLVM(576, 1152);
#endif
break;
case 576: SET_OP(576); break;
#if MCL_MAX_OP_BIT_SIZE == 768
case 640: SET_OP(640);
SET_OP_DBL_LLVM(640, 1280);
break;
case 704: SET_OP(704);
SET_OP_DBL_LLVM(704, 1408);
break;
case 768: SET_OP(768);
SET_OP_DBL_LLVM(768, 1536);
break;
case 640: SET_OP(640); break;
case 704: SET_OP(704); break;
case 768: SET_OP(768); break;
#endif
#else
case 32: SET_OP(32); SET_OP_DBL_LLVM(32, 64); break;
case 96: SET_OP(96); SET_OP_DBL_LLVM(96, 192); break;
case 160: SET_OP(160); SET_OP_DBL_LLVM(160, 320); break;
case 224: SET_OP(224); SET_OP_DBL_LLVM(224, 448); break;
case 32: SET_OP(32); SET_OP_LLVM2(32); break;
case 96: SET_OP(96); SET_OP_LLVM2(96); break;
case 160: SET_OP(160); SET_OP_LLVM2(160); break;
case 224: SET_OP(224); SET_OP_LLVM2(224); break;
case 288: SET_OP(288); break;
case 352: SET_OP(352); break;
case 416: SET_OP(416); break;

@ -10,32 +10,124 @@
namespace mcl { namespace fp {
struct Ltag;
struct Atag;
// (carry, z[N]) <- x[N] + y[N]
template<size_t N, class Tag>class AddPre { static const u3u f; };
template<size_t N, class Tag>struct AddNC { static const u3u f; };
// (carry, z[N]) <- x[N] - y[N]
template<size_t N, class Tag>class SubPre { static const u3u f; };
template<size_t N, class Tag>struct SubNC { static const u3u f; };
// z[N * 2] <- x[N] * y[N]
template<size_t N, class Tag>class MulPre { static const void3u f; };
template<size_t N, class Tag>struct MulPre { static const void3u f; };
// z[N * 2] <- x[N] * x[N]
template<size_t N, class Tag>class SqrPre { static const void2u f; };
template<size_t N, class Tag>struct SqrPre { static const void2u f; };
// z[N + 1] <- x[N] * y
template<size_t N, class Tag>class Mul_UnitPre { static const void2uI f; };
template<size_t N, class Tag>struct Mul_UnitPre { static const void2uI f; };
// z[N] <- x[N + 1] % p[N]
template<size_t N, class Tag>class N1_Mod { static const void3u f; };
template<size_t N, class Tag>struct N1_Mod { static const void3u f; };
// z[N] <- x[N * 2] % p[N]
template<size_t N, class Tag>class Dbl_Mod { static const void3u f; };
template<size_t N, class Tag>struct Dbl_Mod { static const void3u f; };
// z[N] <- Montgomery(x[N], y[N], p[N])
template<size_t N, class Tag>struct Mont { static const void4u f; };
// z[N] <- MontRed(xy[N], p[N])
template<size_t N, class Tag>struct MontRed { static const void3u f; };
// z[N] <- (x[N] * y[N]) % p[N]
template<size_t N, class Tag>struct Mul { static const void4u f; };
// z[N] <- (x[N] ^ 2) % p[N]
template<size_t N, class Tag>struct Sqr { static const void3u f; };
// z[N] <- Montgomery(x[N], x[N], p[N])
template<size_t N, class Tag>
struct SqrMont {
static inline void func(Unit *y, const Unit *x, const Unit *p)
{
Mont<N, Tag>::f(y, x, x, p);
}
static const void3u f;
};
template<size_t N, class Tag>
const void3u SqrMont<N, Tag>::f = SqrMont<N, Tag>::func;
// z[N] <- (x[N] + y[N]) % p[N]
template<size_t N, class Tag>
struct Add {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
if (AddNC<N, Tag>::f(z, x, y)) {
SubNC<N, Tag>::f(z, z, p);
return;
}
Unit tmp[N];
if (SubNC<N, Tag>::f(tmp, z, p) == 0) {
memcpy(z, tmp, sizeof(tmp));
}
}
static const void4u f;
};
template<size_t N, class Tag>
const void4u Add<N, Tag>::f = Add<N, Tag>::func;
// z[N] <- (x[N] - y[N]) % p[N]
template<size_t N, class Tag>
struct Sub {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
if (SubNC<N, Tag>::f(z, x, y)) {
AddNC<N, Tag>::f(z, z, p);
}
}
static const void4u f;
};
template<size_t N, class Tag>
const void4u Sub<N, Tag>::f = Sub<N, Tag>::func;
// z[N * 2] <- (x[N * 2] + y[N * 2]) mod p[N] << (N * UnitBitSize)
template<size_t N, class Tag>
struct DblAdd {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
if (AddNC<N * 2, Tag>::f(z, x, y)) {
SubNC<N, Tag>::f(z + N, z + N, p);
return;
}
Unit tmp[N];
if (SubNC<N, Tag>::f(tmp, z + N, p) == 0) {
memcpy(z + N, tmp, sizeof(tmp));
}
}
static const void4u f;
};
template<size_t N, class Tag>
const void4u DblAdd<N, Tag>::f = DblAdd<N, Tag>::func;
// z[N * 2] <- (x[N * 2] - y[N * 2]) mod p[N] << (N * UnitBitSize)
template<size_t N, class Tag>
struct DblSub {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
if (SubNC<N * 2, Tag>::f(z, x, y)) {
AddNC<N, Tag>::f(z + N, z + N, p);
}
}
static const void4u f;
};
template<size_t N, class Tag>
const void4u DblSub<N, Tag>::f = DblSub<N, Tag>::func;
} } // mcl::fp
#ifdef MCL_USE_LLVM
extern "C" {
#define MCL_FP_DEF_FUNC_SUB(len, suf) \
void mcl_fp_add ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_sub ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_addNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_subNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
mcl::fp::Unit mcl_fp_addNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
mcl::fp::Unit mcl_fp_subNC ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_mul_UnitPre ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl::fp::Unit y); \
void mcl_fpDbl_mulPre ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fpDbl_sqrPre ## len ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
@ -45,15 +137,16 @@ void mcl_fpDbl_add ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const
void mcl_fpDbl_sub ## len ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p);
#define MCL_FP_DEF_FUNC(len) \
MCL_FP_DEF_FUNC_SUB(len, G) \
MCL_FP_DEF_FUNC_SUB(len, L) \
MCL_FP_DEF_FUNC_SUB(len, A)
#define MCL_FP_DEF_FUNC_SPECIAL(suf) \
void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \
void mcl_fp_mul_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* /* dummy */); \
void mcl_fp_sqr_NIST_P192 ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x, const mcl::fp::Unit* /* dummy */); \
void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */);
void mcl_fpDbl_mod_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */); \
void mcl_fp_mul_NIST_P192 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* /* dummy */); \
void mcl_fp_sqr_NIST_P192 ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x, const mcl::fp::Unit* /* dummy */); \
void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* /* dummy */);
extern "C" {
MCL_FP_DEF_FUNC(64)
MCL_FP_DEF_FUNC(128)
@ -83,14 +176,13 @@ MCL_FP_DEF_FUNC(1408)
MCL_FP_DEF_FUNC(1536)
#endif
MCL_FP_DEF_FUNC_SPECIAL(G)
MCL_FP_DEF_FUNC_SPECIAL(L)
MCL_FP_DEF_FUNC_SPECIAL(A)
}
#undef MCL_FP_DEF_FUNC_SUB
#undef MCL_FP_DEF_FUNC
}
#endif

@ -296,57 +296,65 @@ struct Code : public mcl::Generator {
void gen_mcl_fp_addsubNC(bool isAdd)
{
resetGlobalIdx();
Operand r(Int, unit);
Operand pz(IntPtr, bit);
Operand px(IntPtr, bit);
Operand py(IntPtr, bit);
std::string name;
if (isAdd) {
name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L";
mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py);
mcl_fp_addNCM[bit] = Function(name, r, pz, px, py);
verifyAndSetPrivate(mcl_fp_addNCM[bit]);
beginFunc(mcl_fp_addNCM[bit]);
} else {
name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L";
mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py);
mcl_fp_subNCM[bit] = Function(name, r, pz, px, py);
verifyAndSetPrivate(mcl_fp_subNCM[bit]);
beginFunc(mcl_fp_subNCM[bit]);
}
Operand x = load(px);
Operand y = load(py);
Operand x = zext(load(px), bit + unit);
Operand y = zext(load(py), bit + unit);
Operand z;
if (isAdd) {
z = add(x, y);
store(trunc(z, bit), pz);
r = trunc(lshr(z, bit), unit);
} else {
z = sub(x, y);
store(trunc(z, bit), pz);
r = _and(trunc(lshr(z, bit), unit), makeImm(unit, 1));
}
store(z, pz);
ret(Void);
ret(r);
endFunc();
}
#if 0
void gen_mcl_fp_addS()
#if 0 // void-return version
void gen_mcl_fp_addsubNC(bool isAdd)
{
resetGlobalIdx();
Operand pz(IntPtr, bit);
Operand px(IntPtr, bit);
Operand py(IntPtr, bit);
Operand pp(IntPtr, bit);
std::string name = "mcl_fp_add" + cybozu::itoa(bit) + "S";
mcl_fp_addM[bit] = Function(name, Void, pz, px, py, pp);
beginFunc(mcl_fp_addM[bit]);
std::string name;
if (isAdd) {
name = "mcl_fp_addNC" + cybozu::itoa(bit) + "L";
mcl_fp_addNCM[bit] = Function(name, Void, pz, px, py);
verifyAndSetPrivate(mcl_fp_addNCM[bit]);
beginFunc(mcl_fp_addNCM[bit]);
} else {
name = "mcl_fp_subNC" + cybozu::itoa(bit) + "L";
mcl_fp_subNCM[bit] = Function(name, Void, pz, px, py);
verifyAndSetPrivate(mcl_fp_subNCM[bit]);
beginFunc(mcl_fp_subNCM[bit]);
}
Operand x = load(px);
Operand y = load(py);
Operand p = load(pp);
x = zext(x, bit + unit);
y = zext(y, bit + unit);
p = zext(p, bit + unit);
Operand t0 = add(x, y);
Operand t1 = sub(t0, p);
Operand t = lshr(t1, bit);
t = trunc(t, 1);
t = select(t, t0, t1);
t = trunc(t, bit);
store(t, pz);
Operand z;
if (isAdd) {
z = add(x, y);
} else {
z = sub(x, y);
}
store(z, pz);
ret(Void);
endFunc();
}
@ -385,33 +393,6 @@ struct Code : public mcl::Generator {
ret(Void);
endFunc();
}
#if 0
void gen_mcl_fp_subS()
{
resetGlobalIdx();
Operand pz(IntPtr, bit);
Operand px(IntPtr, bit);
Operand py(IntPtr, bit);
Operand pp(IntPtr, bit);
std::string name = "mcl_fp_sub" + cybozu::itoa(bit) + "S";
mcl_fp_subM[bit] = Function(name, Void, pz, px, py, pp);
beginFunc(mcl_fp_subM[bit]);
Operand x = load(px);
Operand y = load(py);
x = zext(x, bit + unit);
y = zext(y, bit + unit);
Operand vc = sub(x, y);
Operand v = trunc(vc, bit); // v = x - y
Operand c = lshr(vc, bit);
c = trunc(c, 1);
Operand p = load(pp);
Operand z = select(c, p, makeImm(bit, 0));
v = add(v, z);
store(v, pz);
ret(Void);
endFunc();
}
#endif
void gen_mcl_fp_sub()
{
resetGlobalIdx();

@ -7,7 +7,7 @@ namespace mcl { namespace fp {
struct Gtag;
template<size_t N>
struct AddPre<N, Gtag> {
struct AddNC<N, Gtag> {
static inline Unit func(Unit *z, const Unit *x, const Unit *y)
{
return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
@ -16,10 +16,10 @@ struct AddPre<N, Gtag> {
};
template<size_t N>
const u3u AddPre<N, Gtag>::f = &AddPre<N, Gtag>::func;
const u3u AddNC<N, Gtag>::f = &AddNC<N, Gtag>::func;
template<size_t N>
struct SubPre<N, Gtag> {
struct SubNC<N, Gtag> {
static inline Unit func(Unit *z, const Unit *x, const Unit *y)
{
return mpn_sub_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N);
@ -28,7 +28,7 @@ struct SubPre<N, Gtag> {
};
template<size_t N>
const u3u SubPre<N, Gtag>::f = &SubPre<N, Gtag>::func;
const u3u SubNC<N, Gtag>::f = &SubNC<N, Gtag>::func;
template<size_t N>
struct MulPre<N, Gtag> {

Loading…
Cancel
Save