rename fp_mod to fpDbl_mod

dev
MITSUNARI Shigeo 9 years ago
parent afb089ffe1
commit 98aa972ebc
  1. 20
      include/mcl/fp.hpp
  2. 20
      include/mcl/fp_generator.hpp
  3. 4
      include/mcl/fp_proto.hpp
  4. 4
      include/mcl/fp_tower.hpp
  5. 14
      include/mcl/op.hpp
  6. 2
      sample/rawbench.cpp
  7. 16
      src/fp.cpp
  8. 4
      src/mul.txt
  9. 6
      src/once.txt

@ -87,7 +87,7 @@ public:
op_.fpDbl_add = fpDbl_addW; op_.fpDbl_add = fpDbl_addW;
op_.fpDbl_sub = fpDbl_subW; op_.fpDbl_sub = fpDbl_subW;
op_.fp_mul = fp_mulW; op_.fp_mul = fp_mulW;
op_.fp_mod = fp_modW; op_.fpDbl_mod = fpDbl_modW;
/* /*
priority : MCL_USE_XBYAK > MCL_USE_LLVM > none priority : MCL_USE_XBYAK > MCL_USE_LLVM > none
Xbyak > llvm_opt > llvm > gmp Xbyak > llvm_opt > llvm > gmp
@ -108,7 +108,7 @@ public:
if (mode == fp::FP_LLVM_MONT) { if (mode == fp::FP_LLVM_MONT) {
op_.fp_mul = fp_montW; op_.fp_mul = fp_montW;
op_.fp_sqr = fp_montSqrW; op_.fp_sqr = fp_montSqrW;
op_.fp_mod = fp_montRedW; op_.fpDbl_mod = fp_montRedW;
} }
#if 0 #if 0
fprintf(stderr, "mode=%d, isMont=%d" fprintf(stderr, "mode=%d, isMont=%d"
@ -466,9 +466,9 @@ private:
op_.fpDbl_subP(z, x, y, op_.p); op_.fpDbl_subP(z, x, y, op_.p);
} }
// z[N] <- xy[N * 2] % p[N] // z[N] <- xy[N * 2] % p[N]
static inline void fp_modW(Unit *z, const Unit *xy) static inline void fpDbl_modW(Unit *z, const Unit *xy)
{ {
op_.fp_modP(z, xy, op_.p); op_.fpDbl_modP(z, xy, op_.p);
} }
// z[N] <- montRed(xy[N * 2]) // z[N] <- montRed(xy[N * 2])
static inline void fp_montRedW(Unit *z, const Unit *xy) static inline void fp_montRedW(Unit *z, const Unit *xy)
@ -478,14 +478,14 @@ private:
static inline void fp_mulW(Unit *z, const Unit *x, const Unit *y) static inline void fp_mulW(Unit *z, const Unit *x, const Unit *y)
{ {
Unit xy[maxSize * 2]; Unit xy[maxSize * 2];
op_.fp_mulPre(xy, x, y); op_.fpDbl_mulPre(xy, x, y);
fp_modW(z, xy); fpDbl_modW(z, xy);
} }
static inline void fp_sqrW(Unit *y, const Unit *x) static inline void fp_sqrW(Unit *y, const Unit *x)
{ {
Unit xx[maxSize * 2]; Unit xx[maxSize * 2];
op_.fp_sqrPre(xx, x); op_.fpDbl_sqrPre(xx, x);
fp_modW(y, xx); fpDbl_modW(y, xx);
} }
static inline void fp_negW(Unit *y, const Unit *x) static inline void fp_negW(Unit *y, const Unit *x)
{ {
@ -498,7 +498,7 @@ private:
op_.montPU(z, x, y, op_.p, op_.rp); op_.montPU(z, x, y, op_.p, op_.rp);
#else #else
Unit xy[maxSize * 2]; Unit xy[maxSize * 2];
op_.fp_mulPre(xy, x, y); op_.fpDbl_mulPre(xy, x, y);
fp_montRedW(z, xy); fp_montRedW(z, xy);
#endif #endif
} }
@ -508,7 +508,7 @@ private:
op_.montPU(y, x, x, op_.p, op_.rp); op_.montPU(y, x, x, op_.p, op_.rp);
#else #else
Unit xx[maxSize * 2]; Unit xx[maxSize * 2];
op_.fp_sqrPre(xx, x); op_.fpDbl_sqrPre(xx, x);
fp_montRedW(y, xx); fp_montRedW(y, xx);
#endif #endif
} }

@ -250,13 +250,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
} }
if (op.N == 3 || op.N == 4) { if (op.N == 3 || op.N == 4) {
align(16); align(16);
op.fp_mod = getCurr<void2u>(); op.fpDbl_mod = getCurr<void2u>();
gen_fp_mod(); gen_fpDbl_mod();
} }
if (op.N == 3 || op.N == 4) { if (op.N == 3 || op.N == 4) {
align(16); align(16);
op.fp_mulPre = getCurr<void3u>(); op.fpDbl_mulPre = getCurr<void3u>();
gen_fp_mulPre(); gen_fpDbl_mulPre();
} }
} }
void gen_addSubNC(bool isAdd, int n) void gen_addSubNC(bool isAdd, int n)
@ -607,7 +607,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
z[2..0] <- montgomery reduction(x[5..0]) z[2..0] <- montgomery reduction(x[5..0])
@note destroy rax, rdx, t0, ..., t10 @note destroy rax, rdx, t0, ..., t10
*/ */
void gen_fp_mod3() void gen_fpDbl_mod3()
{ {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);
const Reg64& z = sf.p[0]; const Reg64& z = sf.p[0];
@ -715,7 +715,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
@note destroy rax, rdx, t0, ..., t10, xm0, xm1 @note destroy rax, rdx, t0, ..., t10, xm0, xm1
xm2 if isFullBit_ xm2 if isFullBit_
*/ */
void gen_fp_mod4() void gen_fpDbl_mod4()
{ {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);
const Reg64& z = sf.p[0]; const Reg64& z = sf.p[0];
@ -831,13 +831,13 @@ struct FpGenerator : Xbyak::CodeGenerator {
movq(z, xm0); movq(z, xm0);
store_mr(z, Pack(t10, t9, t8, t4)); store_mr(z, Pack(t10, t9, t8, t4));
} }
void gen_fp_mod() void gen_fpDbl_mod()
{ {
assert(pn_ == 3 || pn_ == 4); assert(pn_ == 3 || pn_ == 4);
if (pn_ == 3) { if (pn_ == 3) {
gen_fp_mod3(); gen_fpDbl_mod3();
} else if (pn_ == 4) { } else if (pn_ == 4) {
gen_fp_mod4(); gen_fpDbl_mod4();
} }
} }
void gen_sqr() void gen_sqr()
@ -1197,7 +1197,7 @@ struct FpGenerator : Xbyak::CodeGenerator {
store_mr(pz + 8 * 3, Pack(t7, t8, t3, t2)); store_mr(pz + 8 * 3, Pack(t7, t8, t3, t2));
mov(ptr [pz + 8 * 7], d); mov(ptr [pz + 8 * 7], d);
} }
void gen_fp_mulPre() void gen_fpDbl_mulPre()
{ {
if (pn_ == 3) { if (pn_ == 3) {
StackFrame sf(this, 3, 10 | UseRDX); StackFrame sf(this, 3, 10 | UseRDX);

@ -19,8 +19,8 @@ void mcl_fp_sub ## len ## S(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl:
void mcl_fp_sub ## len ## L(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ void mcl_fp_sub ## len ## L(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_addNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fp_addNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_subNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fp_subNC ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_mulPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \ void mcl_fpDbl_mulPre ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fp_sqrPre ## len(mcl::fp::Unit* y, const mcl::fp::Unit* x); \ void mcl_fpDbl_sqrPre ## len(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
void mcl_fp_mont ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p, mcl::fp::Unit r); \ void mcl_fp_mont ## len(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p, mcl::fp::Unit r); \
void mcl_fp_montRed ## len(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p, mcl::fp::Unit r); void mcl_fp_montRed ## len(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p, mcl::fp::Unit r);

@ -47,8 +47,8 @@ public:
/* /*
mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
*/ */
static inline void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fp_mulPre(xy.v_, x.v_, y.v_); } static inline void mulPre(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
static inline void mod(Fp& z, const FpDblT& xy) { Fp::op_.fp_mod(z.v_, xy.v_); } static inline void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_); }
}; };
/* /*

@ -106,7 +106,7 @@ struct Op {
Unit rp; Unit rp;
// z = montRed(xy) // z = montRed(xy)
void (*montRedPU)(Unit *z, const Unit *xy, const Unit *p, Unit rp); void (*montRedPU)(Unit *z, const Unit *xy, const Unit *p, Unit rp);
// z = mont(x, y) = montRed(fp_mulPre(x, y)) // z = mont(x, y) = montRed(fpDbl_mulPre(x, y))
void (*montPU)(Unit *z, const Unit *x, const Unit *y, const Unit *p, Unit rp); void (*montPU)(Unit *z, const Unit *x, const Unit *y, const Unit *p, Unit rp);
// require p // require p
@ -114,7 +114,7 @@ struct Op {
void2uOp fp_invOp; void2uOp fp_invOp;
void4u fp_addP; void4u fp_addP;
void4u fp_subP; void4u fp_subP;
void3u fp_modP; void3u fpDbl_modP;
FpGenerator *fg; FpGenerator *fg;
/* /*
@ -130,9 +130,9 @@ struct Op {
/* /*
FpDbl <=> Fp FpDbl <=> Fp
*/ */
void2u fp_sqrPre; void2u fpDbl_sqrPre;
void3u fp_mulPre; void3u fpDbl_mulPre;
void2u fp_mod; void2u fpDbl_mod;
/* /*
for Fp2 = F[u] / (u^2 + 1) for Fp2 = F[u] / (u^2 + 1)
@ -154,12 +154,12 @@ struct Op {
, isFullBit(true), fp_addNC(0), fp_subNC(0) , isFullBit(true), fp_addNC(0), fp_subNC(0)
, isMont(false), fp_preInv(0) , isMont(false), fp_preInv(0)
, rp(0), montRedPU(0), montPU(0) , rp(0), montRedPU(0), montPU(0)
, fp_negP(0), fp_invOp(0), fp_addP(0), fp_subP(0), fp_modP(0) , fp_negP(0), fp_invOp(0), fp_addP(0), fp_subP(0), fpDbl_modP(0)
, fg(createFpGenerator()) , fg(createFpGenerator())
, fpDbl_add(0), fpDbl_sub() , fpDbl_add(0), fpDbl_sub()
, fpDbl_addP(0), fpDbl_subP() , fpDbl_addP(0), fpDbl_subP()
, fpDbl_addNC(0), fpDbl_subNC() , fpDbl_addNC(0), fpDbl_subNC()
, fp_sqrPre(0), fp_mulPre(0), fp_mod(0) , fpDbl_sqrPre(0), fpDbl_mulPre(0), fpDbl_mod(0)
, xi_c(0) , xi_c(0)
, fp2_add(0), fp2_sub(0), fp2_mul(0), fp2_neg(0) , fp2_add(0), fp2_sub(0), fp2_mul(0), fp2_neg(0)
, fp2_sqr(0), fp2_mul_xi(0) , fp2_sqr(0), fp2_mul_xi(0)

@ -52,7 +52,7 @@ void benchRaw(const char *p, mcl::fp::Mode mode)
memcpy(uy, fy.getUnit(), sizeof(fy)); memcpy(uy, fy.getUnit(), sizeof(fy));
double fp_sqrT, fp_addT, fp_subT, fp_mulT; double fp_sqrT, fp_addT, fp_subT, fp_mulT;
double fpDbl_addT, fpDbl_subT; double fpDbl_addT, fpDbl_subT;
// double fp_sqrPreT, fp_mulPreT, fp_modT; // double fpDbl_sqrPreT, fpDbl_mulPreT, fpDbl_modT;
// double fp2_mulT, fp2_sqrT; // double fp2_mulT, fp2_sqrT;
// double fp_addNCT, fp_subNCT, fpDbl_addNCT,fpDbl_subNCT; // double fp_addNCT, fp_subNCT, fpDbl_addNCT,fpDbl_subNCT;
CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, ux, ux); CYBOZU_BENCH_T(fp_sqrT, op.fp_sqr, ux, ux);

@ -190,7 +190,7 @@ struct OpeFunc {
Gmp::getArray(z, N, mz); Gmp::getArray(z, N, mz);
} }
// z[N * 2] <- x[N] * y[N] // z[N * 2] <- x[N] * y[N]
static inline void fp_mulPreC(Unit *z, const Unit *x, const Unit *y) static inline void fpDbl_mulPreC(Unit *z, const Unit *x, const Unit *y)
{ {
mpz_t mx, my, mz; mpz_t mx, my, mz;
set_zero(mz, z, N * 2); set_zero(mz, z, N * 2);
@ -200,7 +200,7 @@ struct OpeFunc {
clearArray(z, mz->_mp_size, N * 2); clearArray(z, mz->_mp_size, N * 2);
} }
// y[N * 2] <- x[N]^2 // y[N * 2] <- x[N]^2
static inline void fp_sqrPreC(Unit *y, const Unit *x) static inline void fpDbl_sqrPreC(Unit *y, const Unit *x)
{ {
mpz_t mx, my; mpz_t mx, my;
set_zero(my, y, N * 2); set_zero(my, y, N * 2);
@ -209,7 +209,7 @@ struct OpeFunc {
clearArray(y, my->_mp_size, N * 2); clearArray(y, my->_mp_size, N * 2);
} }
// y[N] <- x[N * 2] mod p[N] // y[N] <- x[N * 2] mod p[N]
static inline void fp_modPC(Unit *y, const Unit *x, const Unit *p) static inline void fpDbl_modPC(Unit *y, const Unit *x, const Unit *p)
{ {
mpz_t mx, my, mp; mpz_t mx, my, mp;
set_mpz_t(mx, x, N * 2); set_mpz_t(mx, x, N * 2);
@ -258,9 +258,9 @@ struct OpeFunc {
fp_addNC = mcl_fp_addNC ## n; \ fp_addNC = mcl_fp_addNC ## n; \
fp_subNC = mcl_fp_subNC ## n; \ fp_subNC = mcl_fp_subNC ## n; \
} \ } \
fp_mulPre = mcl_fp_mulPre ## n; \ fpDbl_mulPre = mcl_fpDbl_mulPre ## n; \
if (n <= 256) { \ if (n <= 256) { \
fp_sqrPre = mcl_fp_sqrPre ## n; \ fpDbl_sqrPre = mcl_fpDbl_sqrPre ## n; \
} \ } \
montPU = mcl_fp_mont ## n; \ montPU = mcl_fp_mont ## n; \
montRedPU = mcl_fp_montRed ## n; \ montRedPU = mcl_fp_montRed ## n; \
@ -311,9 +311,9 @@ struct OpeFunc {
fpDbl_subNC = OpeFunc<n * 2>::fp_subNCC; \ fpDbl_subNC = OpeFunc<n * 2>::fp_subNCC; \
} \ } \
} \ } \
fp_mulPre = OpeFunc<n>::fp_mulPreC; \ fpDbl_mulPre = OpeFunc<n>::fpDbl_mulPreC; \
fp_sqrPre = OpeFunc<n>::fp_sqrPreC; \ fpDbl_sqrPre = OpeFunc<n>::fpDbl_sqrPreC; \
fp_modP = OpeFunc<n>::fp_modPC; \ fpDbl_modP = OpeFunc<n>::fpDbl_modPC; \
SET_OP_LLVM(n) SET_OP_LLVM(n)
#ifdef MCL_USE_XBYAK #ifdef MCL_USE_XBYAK

@ -18,7 +18,7 @@ noinline
@endfor @endfor
ret i$(bu) %t$(N-2) ret i$(bu) %t$(N-2)
} }
define void @mcl_fp_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) { define void @mcl_fpDbl_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) {
%x = load i$(bit)* %px %x = load i$(bit)* %px
%y = load i$(bit)* %py %y = load i$(bit)* %py
@for i, 0, N @for i, 0, N
@ -42,7 +42,7 @@ define void @mcl_fp_mulPre$(bit)(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) {
store i$(bu) %sum$(N-1), i$(bu)* %p store i$(bu) %sum$(N-1), i$(bu)* %p
ret void ret void
} }
define void @mcl_fp_sqrPre$(bit)(i$(unit)* %py, i$(bit)* %px) { define void @mcl_fpDbl_sqrPre$(bit)(i$(unit)* %py, i$(bit)* %px) {
%x = load i$(bit)* %px %x = load i$(bit)* %px
@for i, 0, N @for i, 0, N
%x$(i) = call i$(unit) @extract$(bit)(i$(bit) %x, i$(bit) $(unit*i)) %x$(i) = call i$(unit) @extract$(bit)(i$(bit) %x, i$(bit) $(unit*i))

@ -30,7 +30,7 @@ define internal i64 @extract192to64(i192 %x, i192 %shift) {
ret i64 %t1 ret i64 %t1
} }
define internal void @modNIST_P192(i192* %out, i192* %px) { define internal void @fpDbl_modNIST_P192(i192* %out, i192* %px) {
%L192 = load i192* %px %L192 = load i192* %px
%L = zext i192 %L192 to i256 %L = zext i192 %L192 to i256
@ -68,7 +68,7 @@ define internal void @modNIST_P192(i192* %out, i192* %px) {
define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) { define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) {
%buf = alloca i192, i32 2 %buf = alloca i192, i32 2
%p = bitcast i192* %buf to i$(unit)* %p = bitcast i192* %buf to i$(unit)*
call void @mcl_fp_mulPre192(i$(unit)* %p, i192* %px, i192* %py) call void @mcl_fpDbl_mulPre192(i$(unit)* %p, i192* %px, i192* %py)
call void @modNIST_P192(i192* %pz, i192* %buf) call void @fpDbl_modNIST_P192(i192* %pz, i192* %buf)
ret void ret void
} }

Loading…
Cancel
Save