rename fp_mod to fpDbl_mod

dev
MITSUNARI Shigeo 9 years ago
parent afb089ffe1
commit 98aa972ebc
  1. 20
      include/mcl/fp.hpp
  2. 20
      include/mcl/fp_generator.hpp
  3. 4
      include/mcl/fp_proto.hpp
  4. 4
      include/mcl/fp_tower.hpp
  5. 14
      include/mcl/op.hpp
  6. 2
      sample/rawbench.cpp
  7. 16
      src/fp.cpp
  8. 4
      src/mul.txt
  9. 6
      src/once.txt

@ -87,7 +87,7 @@ public:
op_.fpDbl_add = fpDbl_addW;
op_.fpDbl_sub = fpDbl_subW;
op_.fp_mul = fp_mulW;
op_.fp_mod = fp_modW;
op_.fpDbl_mod = fpDbl_modW;
/*
priority : MCL_USE_XBYAK > MCL_USE_LLVM > none
Xbyak > llvm_opt > llvm > gmp
@ -108,7 +108,7 @@ public:
if (mode == fp::FP_LLVM_MONT) {
op_.fp_mul = fp_montW;
op_.fp_sqr = fp_montSqrW;
op_.fp_mod = fp_montRedW;
op_.fpDbl_mod = fp_montRedW;
}
#if 0
fprintf(stderr, "mode=%d, isMont=%d"
@ -466,9 +466,9 @@ private:
op_.fpDbl_subP(z, x, y, op_.p);
}
// z[N] <- xy[N * 2] % p[N]
static inline void fp_modW(Unit *z, const Unit *xy)
static inline void fpDbl_modW(Unit *z, const Unit *xy)
{
op_.fp_modP(z, xy, op_.p);
op_.fpDbl_modP(z, xy, op_.p);
}
// z[N] <- montRed(xy[N * 2])
static inline void fp_montRedW(Unit *z, const Unit *xy)
@ -478,14 +478,14 @@ private:
static inline void fp_mulW(Unit *z, const Unit *x, const Unit *y)
{
Unit xy[maxSize * 2];
op_.fp_mulPre(xy, x, y);
fp_modW(z, xy);
op_.fpDbl_mulPre(xy, x, y);
fpDbl_modW(z, xy);
}
static inline void fp_sqrW(Unit *y, const Unit *x)
{
Unit xx[maxSize * 2];
op_.fp_sqrPre(xx, x);
fp_modW(y, xx);
op_.fpDbl_sqrPre(xx, x);
fpDbl_modW(y, xx);
}
static inline void fp_negW(Unit *y, const Unit *x)
{
@ -498,7 +498,7 @@ private:
op_.montPU(z, x, y, op_.p, op_.rp);
#else
Unit xy[maxSize * 2];
op_.fp_mulPre(xy, x, y);
op_.fpDbl_mulPre(xy, x, y);
fp_montRedW(z, xy);
#endif
}
@ -508,7 +508,7 @@ private:
op_.montPU(y, x, x, op_.p, op_.rp);
#else
Unit xx[maxSize * 2];
op_.fp_sqrPre(xx, x);
op_.fpDbl_sqrPre(xx, x);
fp_montRedW(y, xx);
#endif
}

@ -250,13 +250,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
}
if (op.N == 3 || op.N == 4) {
align(16);
op.fp_mod = getCurr<void2u>();
gen_fp_mod();
op.fpDbl_mod = getCurr<void2u>();
gen_fpDbl_mod();
}
if (op.N == 3 || op.N == 4) {
align(16);
op.fp_mulPre = getCurr<void3u>();
gen_fp_mulPre();
op.fpDbl_mulPre = getCurr<void3u>();
gen_fpDbl_mulPre();
}
}
void gen_addSubNC(bool isAdd, int n)
@ -607,7 +607,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
z[2..0] <- montgomery reduction(x[5..0])
@note destroy rax, rdx, t0, ..., t10
*/
void gen_fp_mod3()
void gen_fpDbl_mod3()
{
StackFrame sf(this, 3, 10 | UseRDX);
const Reg64& z = sf.p[0];
@ -715,7 +715,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
@note destroy rax, rdx, t0, ..., t10, xm0, xm1
xm2 if isFullBit_
*/
void gen_fp_mod4()
void gen_fpDbl_mod4()
{
StackFrame sf(this, 3, 10 | UseRDX);
const Reg64& z = sf.p[0];
@ -831,13 +831,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
movq(z, xm0);
store_mr(z, Pack(t10, t9, t8, t4));
}
void gen_fp_mod()
void gen_fpDbl_mod()
{
assert(pn_ == 3 || pn_ == 4);
if (pn_ == 3) {
gen_fp_mod3();
gen_fpDbl_mod3();
} else if (pn_ == 4) {
gen_fp_mod4();
gen_fpDbl_mod4();
}
}
void gen_sqr()
@ -1197,7 +1197,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
store_mr(pz + 8 * 3, Pack(t7, t8, t3, t2));
mov(ptr [pz + 8 * 7], d);
}
void gen_fp_mulPre()
void gen_fpDbl_mulPre()
{
if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX);

@ -19,8 +19,8 @@ void mcl_fp_sub ## len ## S(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl:
void mcl_fp_sub ## len ## L(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_addNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_subNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_mulPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_sqrPre ## len(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
void mcl_fpDbl_mulPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fpDbl_sqrPre ## len(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
void mcl_fp_mont ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p, mcl::fp::Unit r); \
void mcl_fp_montRed ## len(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p, mcl::fp::Unit r);

@ -47,8 +47,8 @@ public:
/*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/
static inline void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fp_mulPre(xy.v_, x.v_, y.v_); }
static inline void mod(Fp& z, const FpDblT& xy) { Fp::op_.fp_mod(z.v_, xy.v_); }
static inline void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
static inline void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_); }
};
/*

@ -106,7 +106,7 @@ struct Op {
Unit rp;
// z = montRed(xy)
void (*montRedPU)(Unit *z, const Unit *xy, const Unit *p, Unit rp);
// z = mont(x, y) = montRed(fp_mulPre(x, y))
// z = mont(x, y) = montRed(fpDbl_mulPre(x, y))
void (*montPU)(Unit *z, const Unit *x, const Unit *y, const Unit *p, Unit rp);
// require p
@ -114,7 +114,7 @@ struct Op {
void2uOp fp_invOp;
void4u fp_addP;
void4u fp_subP;
void3u fp_modP;
void3u fpDbl_modP;
FpGenerator *fg;
/*
@ -130,9 +130,9 @@ struct Op {
/*
FpDbl <=> Fp
*/
void2u fp_sqrPre;
void3u fp_mulPre;
void2u fp_mod;
void2u fpDbl_sqrPre;
void3u fpDbl_mulPre;
void2u fpDbl_mod;
/*
for Fp2 = F[u] / (u^2 + 1)
@ -154,12 +154,12 @@ struct Op {
, isFullBit(true), fp_addNC(0), fp_subNC(0)
, isMont(false), fp_preInv(0)
, rp(0), montRedPU(0), montPU(0)
, fp_negP(0), fp_invOp(0), fp_addP(0), fp_subP(0), fp_modP(0)
, fp_negP(0), fp_invOp(0), fp_addP(0), fp_subP(0), fpDbl_modP(0)
, fg(createFpGenerator())
, fpDbl_add(0), fpDbl_sub()
, fpDbl_addP(0), fpDbl_subP()
, fpDbl_addNC(0), fpDbl_subNC()
, fp_sqrPre(0), fp_mulPre(0), fp_mod(0)
, fpDbl_sqrPre(0), fpDbl_mulPre(0), fpDbl_mod(0)
, xi_c(0)
, fp2_add(0), fp2_sub(0), fp2_mul(0), fp2_neg(0)
, fp2_sqr(0), fp2_mul_xi(0)

@ -52,7 +52,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
memcpy(uy, fy.getUnit(), sizeof(fy));
double fp_sqrT, fp_addT, fp_subT, fp_mulT;
double fpDbl_addT, fpDbl_subT;
// double fp_sqrPreT, fp_mulPreT, fp_modT;
// double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT;
// double fp2_mulT, fp2_sqrT;
// double fp_addNCT, fp_subNCT, fpDbl_addNCT,fpDbl_subNCT;
CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, ux, ux);

@ -190,7 +190,7 @@ struct OpeFunc {
Gmp::getArray(z, N, mz);
}
// z[N * 2] <- x[N] * y[N]
static inline void fp_mulPreC(Unit *z, const Unit *x, const Unit *y)
static inline void fpDbl_mulPreC(Unit *z, const Unit *x, const Unit *y)
{
mpz_t mx, my, mz;
set_zero(mz, z, N * 2);
@ -200,7 +200,7 @@ struct OpeFunc {
clearArray(z, mz->_mp_size, N * 2);
}
// y[N * 2] <- x[N]^2
static inline void fp_sqrPreC(Unit *y, const Unit *x)
static inline void fpDbl_sqrPreC(Unit *y, const Unit *x)
{
mpz_t mx, my;
set_zero(my, y, N * 2);
@ -209,7 +209,7 @@ struct OpeFunc {
clearArray(y, my->_mp_size, N * 2);
}
// y[N] <- x[N * 2] mod p[N]
static inline void fp_modPC(Unit *y, const Unit *x, const Unit *p)
static inline void fpDbl_modPC(Unit *y, const Unit *x, const Unit *p)
{
mpz_t mx, my, mp;
set_mpz_t(mx, x, N * 2);
@ -258,9 +258,9 @@ struct OpeFunc {
fp_addNC = mcl_fp_addNC ## n; \
fp_subNC = mcl_fp_subNC ## n; \
} \
fp_mulPre = mcl_fp_mulPre ## n; \
fpDbl_mulPre = mcl_fpDbl_mulPre ## n; \
if (n <= 256) { \
fp_sqrPre = mcl_fp_sqrPre ## n; \
fpDbl_sqrPre = mcl_fpDbl_sqrPre ## n; \
} \
montPU = mcl_fp_mont ## n; \
montRedPU = mcl_fp_montRed ## n; \
@ -311,9 +311,9 @@ struct OpeFunc {
fpDbl_subNC = OpeFunc<n * 2>::fp_subNCC; \
} \
} \
fp_mulPre = OpeFunc<n>::fp_mulPreC; \
fp_sqrPre = OpeFunc<n>::fp_sqrPreC; \
fp_modP = OpeFunc<n>::fp_modPC; \
fpDbl_mulPre = OpeFunc<n>::fpDbl_mulPreC; \
fpDbl_sqrPre = OpeFunc<n>::fpDbl_sqrPreC; \
fpDbl_modP = OpeFunc<n>::fpDbl_modPC; \
SET_OP_LLVM(n)
#ifdef MCL_USE_XBYAK

@ -18,7 +18,7 @@ noinline
@endfor
ret i$(bu) %t$(N-2)
}
define void @mcl_fp_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) {
define void @mcl_fpDbl_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) {
%x = load i$(bit)* %px
%y = load i$(bit)* %py
@for i, 0, N
@ -42,7 +42,7 @@ define void @mcl_fp_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) {
store i$(bu) %sum$(N-1), i$(bu)* %p
ret void
}
define void @mcl_fp_sqrPre$(bit)(i$(unit)* %py, i$(bit)* %px) {
define void @mcl_fpDbl_sqrPre$(bit)(i$(unit)* %py, i$(bit)* %px) {
%x = load i$(bit)* %px
@for i, 0, N
%x$(i) = call i$(unit) @extract$(bit)(i$(bit) %x, i$(bit) $(unit*i))

@ -30,7 +30,7 @@ define internal i64 @extract192to64(i192 %x, i192 %shift) {
ret i64 %t1
}
define internal void @modNIST_P192(i192* %out, i192* %px) {
define internal void @fpDbl_modNIST_P192(i192* %out, i192* %px) {
%L192 = load i192* %px
%L = zext i192 %L192 to i256
@ -68,7 +68,7 @@ define internal void @modNIST_P192(i192* %out, i192* %px) {
define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) {
%buf = alloca i192, i32 2
%p = bitcast i192* %buf to i$(unit)*
call void @mcl_fp_mulPre192(i$(unit)* %p, i192* %px, i192* %py)
call void @modNIST_P192(i192* %pz, i192* %buf)
call void @mcl_fpDbl_mulPre192(i$(unit)* %p, i192* %px, i192* %py)
call void @fpDbl_modNIST_P192(i192* %pz, i192* %buf)
ret void
}

Loading…
Cancel
Save