add gmp version of Mont with not full prime

dev
MITSUNARI Shigeo 8 years ago
parent 21324c9e0a
commit 389bb658ef
  1. 4
      readme.md
  2. 9
      src/fp.cpp
  3. 91
      src/gen.cpp
  4. 78
      src/low_func.hpp
  5. 3
      src/low_func_llvm.hpp
  6. 1
      src/proto.hpp

@ -89,7 +89,9 @@ make MCL_USE_LLVM=1 LLVM_VER=-3.8 ARCH=arm
A benchmark of a BN curve over the 254-bit prime.
* x64, x86 ; Inte Core i7-6700 3.4GHz(Skylake)
* `sudo cpufreq-set -g performance`
```
sudo cpufreq-set -g performance
```
* arm ; 900MHz quad-core ARM Cortex-A7 on Raspberry Pi2, Linux 4.4.11-v7+
* arm64 ; 1.2GHz ARM Cortex-A53 [HiKey](http://www.96boards.org/product/hikey/)

@ -191,8 +191,13 @@ void setOp2(Op& op)
op.fp_sub = Sub<N, false, Tag>::f;
}
if (op.isMont) {
op.fp_mul = Mont<N, Tag>::f;
op.fp_sqr = SqrMont<N, Tag>::f;
if (op.isFullBit) {
op.fp_mul = Mont<N, true, Tag>::f;
op.fp_sqr = SqrMont<N, true, Tag>::f;
} else {
op.fp_mul = Mont<N, false, Tag>::f;
op.fp_sqr = SqrMont<N, false, Tag>::f;
}
op.fpDbl_mod = MontRed<N, Tag>::f;
} else {
op.fp_mul = Mul<N, Tag>::f;

@ -738,7 +738,7 @@ struct Code : public mcl::Generator {
generic_fpDbl_mul(py, px, px);
endFunc();
}
void gen_mcl_fp_mont()
void gen_mcl_fp_mont(bool isFullBit = true)
{
const int bu = bit + unit;
const int bu2 = bit + unit * 2;
@ -747,39 +747,75 @@ struct Code : public mcl::Generator {
Operand px(IntPtr, unit);
Operand py(IntPtr, unit);
Operand pp(IntPtr, unit);
std::string name = "mcl_fp_mont" + cybozu::itoa(N) + "L";
std::string name = "mcl_fp_mont";
if (!isFullBit) {
name += "NF";
}
name += cybozu::itoa(N) + "L";
mcl_fp_montM[N] = Function(name, Void, pz, px, py, pp);
mcl_fp_montM[N].setAlias();
verifyAndSetPrivate(mcl_fp_montM[N]);
beginFunc(mcl_fp_montM[N]);
Operand rp = load(getelementptr(pp, -1));
Operand p = loadN(pp, N);
Operand z, s, a;
for (uint32_t i = 0; i < N; i++) {
Operand y = load(getelementptr(py, i));
Operand xy = call(mulPvM[bit], px, y);
Operand at;
if (i == 0) {
a = zext(xy, bu2);
at = trunc(xy, unit);
} else {
xy = zext(xy, bu2);
a = add(s, xy);
at = trunc(a, unit);
if (1 || isFullBit) {
for (uint32_t i = 0; i < N; i++) {
Operand y = load(getelementptr(py, i));
Operand xy = call(mulPvM[bit], px, y);
Operand at;
if (i == 0) {
a = zext(xy, bu2);
at = trunc(xy, unit);
} else {
xy = zext(xy, bu2);
a = add(s, xy);
at = trunc(a, unit);
}
Operand q = mul(at, rp);
Operand pq = call(mulPvM[bit], pp, q);
pq = zext(pq, bu2);
Operand t = add(a, pq);
s = lshr(t, unit);
}
Operand q = mul(at, rp);
Operand pq = call(mulPvM[bit], pp, q);
pq = zext(pq, bu2);
Operand t = add(a, pq);
s = lshr(t, unit);
s = trunc(s, bu);
Operand p = zext(loadN(pp, N), bu);
Operand vc = sub(s, p);
Operand c = trunc(lshr(vc, bit), 1);
z = select(c, s, vc);
z = trunc(z, bit);
storeN(z, pz);
} else {
for (uint32_t i = 0; i < N; i++) {
Operand y = load(getelementptr(py, i));
Operand xy = call(mulPvM[bit], px, y);
Operand at;
if (i == 0) {
a = xy;
at = trunc(xy, unit);
Operand q = mul(at, rp);
Operand pq = call(mulPvM[bit], pp, q);
pq = zext(pq, bu2);
Operand t = add(a, pq);
s = lshr(t, unit);
} else {
xy = zext(xy, bu2);
a = add(s, xy);
at = trunc(a, unit);
Operand q = mul(at, rp);
Operand pq = call(mulPvM[bit], pp, q);
pq = zext(pq, bu2);
Operand t = add(a, pq);
s = lshr(t, unit);
}
}
s = trunc(s, bu);
Operand p = zext(loadN(pp, N), bu);
Operand vc = sub(s, p);
Operand c = trunc(lshr(vc, bit), 1);
z = select(c, s, vc);
z = trunc(z, bit);
storeN(z, pz);
}
s = trunc(s, bu);
p = zext(p, bu);
Operand vc = sub(s, p);
Operand c = trunc(lshr(vc, bit), 1);
z = select(c, s, vc);
z = trunc(z, bit);
storeN(z, pz);
ret(Void);
endFunc();
}
@ -840,7 +876,8 @@ struct Code : public mcl::Generator {
gen_mcl_fp_mulUnitPre();
gen_mcl_fpDbl_mulPre();
gen_mcl_fpDbl_sqrPre();
gen_mcl_fp_mont();
gen_mcl_fp_mont(true);
gen_mcl_fp_mont(false);
gen_mcl_fp_montRed();
}
void setBit(uint32_t bit)

@ -507,7 +507,7 @@ const void3u MontRed<N, Tag>::f = MontRed<N, Tag>::func;
z[N] <- Montgomery(x[N], y[N], p[N])
REMARK : assume p[-1] = rp
*/
template<size_t N, class Tag = Gtag>
template<size_t N, bool isFullBit, class Tag = Gtag>
struct Mont {
static inline void func(Unit *z, const Unit *x, const Unit *y, const Unit *p)
{
@ -517,26 +517,56 @@ struct Mont {
MontRed<N, Tag>::f(z, xy, p);
#else
const Unit rp = p[-1];
Unit buf[N * 2 + 2];
Unit *c = buf;
MulUnitPre<N, Tag>::f(c, x, y[0]); // x * y[0]
Unit q = c[0] * rp;
Unit t[N + 2];
MulUnitPre<N, Tag>::f(t, p, q); // p * q
t[N + 1] = 0; // always zero
c[N + 1] = AddPre<N + 1, Tag>::f(c, c, t);
c++;
for (size_t i = 1; i < N; i++) {
MulUnitPre<N, Tag>::f(t, x, y[i]);
if (isFullBit) {
Unit buf[N * 2 + 2];
Unit *c = buf;
MulUnitPre<N, Tag>::f(c, x, y[0]); // x * y[0]
Unit q = c[0] * rp;
Unit t[N + 2];
MulUnitPre<N, Tag>::f(t, p, q); // p * q
t[N + 1] = 0; // always zero
c[N + 1] = AddPre<N + 1, Tag>::f(c, c, t);
q = c[0] * rp;
MulUnitPre<N, Tag>::f(t, p, q);
AddPre<N + 2, Tag>::f(c, c, t);
c++;
}
if (c[N]) {
SubPre<N, Tag>::f(z, c, p);
for (size_t i = 1; i < N; i++) {
MulUnitPre<N, Tag>::f(t, x, y[i]);
c[N + 1] = AddPre<N + 1, Tag>::f(c, c, t);
q = c[0] * rp;
MulUnitPre<N, Tag>::f(t, p, q);
AddPre<N + 2, Tag>::f(c, c, t);
c++;
}
if (c[N]) {
SubPre<N, Tag>::f(z, c, p);
} else {
if (SubPre<N, Tag>::f(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
}
} else {
Unit carry;
(void)carry;
Unit buf[N * 2 + 1];
Unit *c = buf;
MulUnitPre<N, Tag>::f(c, x, y[0]); // x * y[0]
Unit q = c[0] * rp;
Unit t[N + 1];
MulUnitPre<N, Tag>::f(t, p, q); // p * q
carry = AddPre<N + 1, Tag>::f(c, c, t);
assert(carry == 0);
c++;
c[N] = 0;
for (size_t i = 1; i < N; i++) {
c[N + 1] = 0;
MulUnitPre<N, Tag>::f(t, x, y[i]);
carry = AddPre<N + 1, Tag>::f(c, c, t);
assert(carry == 0);
q = c[0] * rp;
MulUnitPre<N, Tag>::f(t, p, q);
carry = AddPre<N + 1, Tag>::f(c, c, t);
assert(carry == 0);
c++;
}
assert(c[N] == 0);
if (SubPre<N, Tag>::f(z, c, p)) {
memcpy(z, c, N * sizeof(Unit));
}
@ -546,11 +576,11 @@ struct Mont {
static const void4u f;
};
template<size_t N, class Tag>
const void4u Mont<N, Tag>::f = Mont<N, Tag>::func;
template<size_t N, bool isFullBit, class Tag>
const void4u Mont<N, isFullBit, Tag>::f = Mont<N, isFullBit, Tag>::func;
// z[N] <- Montgomery(x[N], x[N], p[N])
template<size_t N, class Tag = Gtag>
template<size_t N, bool isFullBit, class Tag = Gtag>
struct SqrMont {
static inline void func(Unit *y, const Unit *x, const Unit *p)
{
@ -559,13 +589,13 @@ struct SqrMont {
SqrPre<N, Tag>::f(xx, x);
MontRed<N, Tag>::f(y, xx, p);
#else
Mont<N, Tag>::f(y, x, x, p);
Mont<N, isFullBit, Tag>::f(y, x, x, p);
#endif
}
static const void3u f;
};
template<size_t N, class Tag>
const void3u SqrMont<N, Tag>::f = SqrMont<N, Tag>::func;
template<size_t N, bool isFullBit, class Tag>
const void3u SqrMont<N, isFullBit, Tag>::f = SqrMont<N, isFullBit, Tag>::func;
// z[N] <- (x[N] * y[N]) % p[N]
template<size_t N, class Tag = Gtag>

@ -35,7 +35,8 @@ template<>const void4u Add<n, true, Ltag>::f = &mcl_fp_add ## n ## L; \
template<>const void4u Add<n, false, Ltag>::f = &mcl_fp_addNF ## n ## L; \
template<>const void4u Sub<n, true, Ltag>::f = &mcl_fp_sub ## n ## L; \
template<>const void4u Sub<n, false, Ltag>::f = &mcl_fp_subNF ## n ## L; \
template<>const void4u Mont<n, Ltag>::f = &mcl_fp_mont ## n ## L; \
template<>const void4u Mont<n, true, Ltag>::f = &mcl_fp_mont ## n ## L; \
template<>const void4u Mont<n, false, Ltag>::f = &mcl_fp_mont ## n ## L; \
template<>const void3u MontRed<n, Ltag>::f = &mcl_fp_montRed ## n ## L; \
template<>const void4u DblAdd<n, Ltag>::f = &mcl_fpDbl_add ## n ## L; \
template<>const void4u DblSub<n, Ltag>::f = &mcl_fpDbl_sub ## n ## L; \

@ -20,6 +20,7 @@ void mcl_fp_mulUnitPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, mcl
void mcl_fpDbl_mulPre ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y); \
void mcl_fpDbl_sqrPre ## n ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
void mcl_fp_mont ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_montNF ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_montRed ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p); \
void mcl_fpDbl_add ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fpDbl_sub ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p);

Loading…
Cancel
Save