rename mulI to mulU

dev
MITSUNARI Shigeo 9 years ago
parent dece687158
commit fd519581e8
  1. 8
      include/mcl/fp.hpp
  2. 8
      include/mcl/op.hpp
  3. 8
      sample/rawbench.cpp
  4. 6
      src/fp.cpp
  5. 20
      src/fp_generator.hpp
  6. 2
      src/fp_proto.hpp
  7. 2
      src/mul.txt
  8. 4
      test/fp_generator_test.cpp
  9. 10
      test/mont_fp_test.cpp

@ -87,7 +87,7 @@ public:
op_.fpDbl_add = fpDbl_addW;
op_.fpDbl_sub = fpDbl_subW;
op_.fp_mul = fp_mulW;
op_.fp_mulI = fp_mulIW;
op_.fp_mulU = fp_mulUW;
op_.fpDbl_mod = fpDbl_modW;
/*
priority : MCL_USE_XBYAK > MCL_USE_LLVM > none
@ -307,7 +307,7 @@ public:
static inline void addNC(FpT& z, const FpT& x, const FpT& y) { op_.fp_addNC(z.v_, x.v_, y.v_); }
static inline void subNC(FpT& z, const FpT& x, const FpT& y) { op_.fp_subNC(z.v_, x.v_, y.v_); }
static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.fp_mul(z.v_, x.v_, y.v_); }
static inline void mulI(FpT& z, const FpT& x, const Unit y) { op_.fp_mulI(z.v_, x.v_, y); }
static inline void mulU(FpT& z, const FpT& x, const Unit y) { op_.fp_mulU(z.v_, x.v_, y); }
static inline void inv(FpT& y, const FpT& x) { op_.fp_invOp(y.v_, x.v_, op_); }
static inline void neg(FpT& y, const FpT& x) { op_.fp_neg(y.v_, x.v_); }
static inline void sqr(FpT& y, const FpT& x) { op_.fp_sqr(y.v_, x.v_); }
@ -482,10 +482,10 @@ private:
{
op_.montRedPU(z, xy, op_.p, op_.rp);
}
static inline void fp_mulIW(Unit *z, const Unit *x, Unit y)
static inline void fp_mulUW(Unit *z, const Unit *x, Unit y)
{
Unit xy[maxSize + 1];
op_.fp_mulIPre(xy, x, y);
op_.fp_mulUPre(xy, x, y);
fpN1_modW(z, xy);
}
static inline void fp_mulW(Unit *z, const Unit *x, const Unit *y)

@ -99,9 +99,9 @@ struct Op {
void3u fp_add;
void3u fp_sub;
void3u fp_mul;
void2uI fp_mulIPre; // z[N + 1] = x[N] * y
void2uI fp_mulUPre; // z[N + 1] = x[N] * y
void3u fpN1_modP; // y[N] = x[N + 1] % p[N]
void2uI fp_mulI; // fpN1_modP + fp_mulIPre
void2uI fp_mulU; // fpN1_modP + fp_mulUPre
bool isFullBit; // true if bitSize % uniSize == 0
bool isMont; // true if use Montgomery
@ -163,9 +163,9 @@ struct Op {
: N(0), bitSize(0)
, fp_isZero(0), fp_clear(0), fp_copy(0)
, fp_neg(0), fp_sqr(0), fp_add(0), fp_sub(0), fp_mul(0)
, fp_mulIPre(0)
, fp_mulUPre(0)
, fpN1_modP(0)
, fp_mulI(0)
, fp_mulU(0)
, isFullBit(false)
, isMont(false)
, primeMode(PM_GENERIC)

@ -40,7 +40,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
memcpy(ux + op.N, fx.getUnit(), sizeof(Unit) * op.N);
memcpy(uy, fy.getUnit(), sizeof(Unit) * op.N);
memcpy(ux + op.N, fx.getUnit(), sizeof(Unit) * op.N);
double fp_sqrT, fp_addT, fp_subT, fp_mulT, fp_mulIT;
double fp_sqrT, fp_addT, fp_subT, fp_mulT, fp_mulUT;
double fpDbl_addT, fpDbl_subT;
double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT;
double fp2_sqrT, fp2_mulT;
@ -48,7 +48,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
CYBOZU_BENCH_T(fp_addT, op.fp_add, uz, ux, uy);
CYBOZU_BENCH_T(fp_subT, op.fp_sub, uz, uy, ux);
CYBOZU_BENCH_T(fp_mulT, op.fp_mul, uz, ux, uy);
CYBOZU_BENCH_T(fp_mulIT, op.fp_mulI, uz, ux, 12345678);
CYBOZU_BENCH_T(fp_mulUT, op.fp_mulU, uz, ux, 12345678);
CYBOZU_BENCH_T(fpDbl_addT, op.fpDbl_add, uz, ux, uy);
CYBOZU_BENCH_T(fpDbl_subT, op.fpDbl_sub, uz, uy, ux);
CYBOZU_BENCH_T(fpDbl_sqrPreT, op.fpDbl_sqrPre, uz, ux);
@ -62,7 +62,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
CYBOZU_BENCH_T(fp2_mulT, Fp2::mul, f2x, f2x, f2y);
printf("%s\n", getModeStr(mode));
const char *tStrTbl[] = {
"fp_add", "fp_sub", "fp_sqr", "fp_mul", "fp_mulI",
"fp_add", "fp_sub", "fp_sqr", "fp_mul", "fp_mulU",
"D_add", "D_sub",
"D_sqrPre", "D_mulPre", "D_mod",
"fp2_sqr", "fp2_mul",
@ -72,7 +72,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
}
printf("\n");
const double tTbl[] = {
fp_addT, fp_subT, fp_sqrT, fp_mulT, fp_mulIT,
fp_addT, fp_subT, fp_sqrT, fp_mulT, fp_mulUT,
fpDbl_addT, fpDbl_subT,
fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT,
fp2_sqrT, fp2_mulT,

@ -194,7 +194,7 @@ struct OpeFunc {
Gmp::getArray(z, N, mz);
}
// z[N + 1] <- x[N] * y
static inline void fp_mulIPreC(Unit *z, const Unit *x, Unit y)
static inline void fp_mulUPreC(Unit *z, const Unit *x, Unit y)
{
mpz_t mx, mz;
set_zero(mz, z, N + 1);
@ -282,7 +282,7 @@ struct OpeFunc {
fp_subNC = mcl_fp_subNC ## n; \
} \
fpDbl_mulPre = mcl_fpDbl_mulPre ## n; \
fp_mulIPre = mcl_fp_mulIPre ## n; \
fp_mulUPre = mcl_fp_mulUPre ## n; \
if (n <= 256) { \
fpDbl_sqrPre = mcl_fpDbl_sqrPre ## n; \
} \
@ -335,7 +335,7 @@ struct OpeFunc {
fpDbl_subNC = OpeFunc<n * 2>::fp_subNCC; \
} \
} \
fp_mulIPre = OpeFunc<n>::fp_mulIPreC; \
fp_mulUPre = OpeFunc<n>::fp_mulUPreC; \
fpN1_modP = OpeFunc<n>::fpN1_modPC; \
fpDbl_mulPre = OpeFunc<n>::fpDbl_mulPreC; \
fpDbl_sqrPre = OpeFunc<n>::fpDbl_sqrPreC; \

@ -161,7 +161,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
// preInv
typedef int (*int2op)(uint64_t*, const uint64_t*);
void3u mul_;
uint3opI mulI_;
uint3opI mulU_;
void *montRedRaw_;
void2op shr1_;
FpGenerator()
@ -172,7 +172,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
, pn_(0)
, isFullBit_(0)
, mul_(0)
, mulI_(0)
, mulU_(0)
, montRedRaw_(0)
, shr1_(0)
{
@ -219,8 +219,8 @@ struct FpGenerator : Xbyak::CodeGenerator {
gen_neg();
align(16);
mulI_ = getCurr<uint3opI>();
gen_mulI();
mulU_ = getCurr<uint3opI>();
gen_mulU();
if (op.primeMode == PM_NICT_P521) {
align(16);
op.fpDbl_mod = getCurr<void2u>();
@ -345,7 +345,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
wk[0] if useMulx_
wk[0..n-2] otherwise
*/
void gen_raw_mulI(const RegExp& pz, const RegExp& px, const Reg64& y, const MixPack& wk, const Reg64& t, size_t n)
void gen_raw_mulU(const RegExp& pz, const RegExp& px, const Reg64& y, const MixPack& wk, const Reg64& t, size_t n)
{
assert(n >= 2);
if (n == 2) {
@ -407,7 +407,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
mov(ptr [pz + (n - 1) * 8], rax);
adc(rdx, 0);
}
void gen_mulI()
void gen_mulU()
{
assert(pn_ >= 2);
const int regNum = useMulx_ ? 2 : (1 + std::min(pn_ - 1, 8));
@ -419,7 +419,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
size_t rspPos = 0;
Pack remain = sf.t.sub(1);
MixPack wk(remain, rspPos, pn_ - 1);
gen_raw_mulI(pz, px, y, wk, sf.t[0], pn_);
gen_raw_mulU(pz, px, y, wk, sf.t[0], pn_);
mov(rax, rdx);
}
/*
@ -2472,10 +2472,10 @@ private:
{
// pc[] += x[] * y
if (isFirst) {
gen_raw_mulI(pc, px, y, pw1, t, n);
gen_raw_mulU(pc, px, y, pw1, t, n);
mov(ptr [pc + n * 8], rdx);
} else {
gen_raw_mulI(pw2, px, y, pw1, t, n);
gen_raw_mulU(pw2, px, y, pw1, t, n);
mov(t, ptr [pw2 + 0 * 8]);
add(ptr [pc + 0 * 8], t);
for (int i = 1; i < n; i++) {
@ -2492,7 +2492,7 @@ private:
mov(rax, pp);
mul(qword [pc]);
mov(y, rax); // y = q
gen_raw_mulI(pw2, p, y, pw1, t, n);
gen_raw_mulU(pw2, p, y, pw1, t, n);
// c[] = (c[] + pw2[]) >> 64
mov(t, ptr [pw2 + 0 * 8]);
add(t, ptr [pc + 0 * 8]);

@ -19,7 +19,7 @@ void mcl_fp_sub ## len ## S(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl:
void mcl_fp_sub ## len ## L(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_addNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_subNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_mulIPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl::fp::Unit y); \
void mcl_fp_mulUPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl::fp::Unit y); \
void mcl_fpDbl_mulPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fpDbl_sqrPre ## len(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
void mcl_fp_mont ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p, mcl::fp::Unit r); \

@ -18,7 +18,7 @@ noinline
@endfor
ret i$(bu) %t$(N-2)
}
define void @mcl_fp_mulIPre$(bit)(i$(bu)* %pz, i$(bit)* %px, i$(unit) %y)
define void @mcl_fp_mulUPre$(bit)(i$(bu)* %pz, i$(bit)* %px, i$(unit) %y)
{
%x = load i$(bit)* %px
%z = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y)

@ -150,7 +150,7 @@ void testMulI(const mcl::fp::FpGenerator& fg, int pn)
mpz_class my;
mcl::Gmp::set(my, y);
mx *= my;
uint64_t d = fg.mulI_(z, x, y);
uint64_t d = fg.mulU_(z, x, y);
z[pn] = d;
mcl::Gmp::setArray(my, z, pn + 1);
CYBOZU_TEST_EQUAL(mx, my);
@ -160,7 +160,7 @@ void testMulI(const mcl::fp::FpGenerator& fg, int pn)
uint64_t z[MAX_N + 1];
rg.read(x, pn);
uint64_t y = rg.get64();
CYBOZU_BENCH_C("mulI", 10000000, fg.mulI_, z, x, y);
CYBOZU_BENCH_C("mulU", 10000000, fg.mulU_, z, x, y);
}
}

@ -125,7 +125,7 @@ struct Test {
modulo();
ope();
power();
mulI();
mulU();
power_Zn();
setArray();
set64bit();
@ -489,12 +489,12 @@ struct Test {
z *= x;
}
}
void mulI()
void mulU()
{
Fp x(-1), y, z;
for (int i = 0; i < 20; i++) {
Fp::mul(y, x, i);
Fp::mulI(z, x, i);
for (unsigned int u = 0; u < 20; u++) {
Fp::mul(y, x, u);
Fp::mulU(z, x, u);
CYBOZU_TEST_EQUAL(y, z);
}
}

Loading…
Cancel
Save