op.add requires p

dev
MITSUNARI Shigeo 8 years ago
parent 0e9f97b6db
commit 20c35f1296
  1. 7
      Makefile
  2. 6
      include/mcl/fp.hpp
  3. 30
      include/mcl/fp_tower.hpp
  4. 2
      include/mcl/op.hpp
  5. 15
      src/fp.cpp
  6. 6
      src/fp_generator.hpp
  7. 7
      test/fp_generator_test.cpp
  8. 2
      test/fp_test.cpp

@ -6,7 +6,10 @@ EXE_DIR=bin
SRC_SRC=fp.cpp
TEST_SRC=fp_test.cpp ec_test.cpp fp_util_test.cpp window_method_test.cpp elgamal_test.cpp fp_tower_test.cpp gmp_test.cpp bn_test.cpp
ifeq ($(CPU),x86-64)
TEST_SRC+=fp_generator_test.cpp mont_fp_test.cpp sq_test.cpp low_test.cpp
TEST_SRC+=fp_generator_test.cpp mont_fp_test.cpp sq_test.cpp
ifeq ($(USE_LOW_ASM),1)
TEST_SRC+=low_test.cpp
endif
endif
SAMPLE_SRC=bench.cpp ecdh.cpp random.cpp rawbench.cpp vote.cpp pairing.cpp large.cpp
@ -107,6 +110,8 @@ ALL_SRC=$(SRC_SRC) $(TEST_SRC) $(SAMPLE_SRC)
DEPEND_FILE=$(addprefix $(OBJ_DIR)/, $(ALL_SRC:.cpp=.d))
-include $(DEPEND_FILE)
.PHONY: test
# don't remove these files automatically
.SECONDARY: $(addprefix $(OBJ_DIR)/, $(ALL_SRC:.cpp=.o))

@ -131,7 +131,7 @@ public:
op_.clear();
op_.fp_neg = fp_negW;
op_.fp_sqr = fp_sqrW;
op_.fp_add = fp_addW;
op_.fp_add = 0;
op_.fp_sub = fp_subW;
op_.fpDbl_add = fpDbl_addW;
op_.fpDbl_sub = fpDbl_subW;
@ -365,7 +365,7 @@ public:
if (x < 0) throw cybozu::Exception("Fp:setMpz:negative is not supported") << x;
setArray(gmp::getUnit(x), gmp::getUnitSize(x));
}
static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_); }
static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.fp_add(z.v_, x.v_, y.v_, op_.p); }
static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.fp_sub(z.v_, x.v_, y.v_); }
static inline void addNC(FpT& z, const FpT& x, const FpT& y) { op_.fp_addNC(z.v_, x.v_, y.v_); }
static inline void subNC(FpT& z, const FpT& x, const FpT& y) { op_.fp_subNC(z.v_, x.v_, y.v_); }
@ -480,10 +480,12 @@ private:
case 2: fp_addW(z.v_, x.v_, y.v_)
op_.fp_addP(z, x, y, p) written by GMP/LLVM with generic p
*/
#if 0
static inline void fp_addW(Unit *z, const Unit *x, const Unit *y)
{
op_.fp_addP(z, x, y, op_.p);
}
#endif
static inline void fp_subW(Unit *z, const Unit *x, const Unit *y)
{
op_.fp_subP(z, x, y, op_.p);

@ -162,7 +162,13 @@ public:
mcl::fp::Op& op = Fp::op_;
op.fp2_add = fp2_addW;
op.fp2_sub = fp2_subW;
op.fp2_mul = op.isFastMod ? fp2_mulW : fp2_mulUseDblW;
if (op.isFastMod) {
op.fp2_mul = fp2_mulW;
} else if (!op.isFullBit) {
op.fp2_mul = fp2_mulUseDblUseNCW;
} else {
op.fp2_mul = fp2_mulUseDblW;
}
op.fp2_neg = fp2_negW;
op.fp2_inv = fp2_invW;
op.fp2_sqr = fp2_sqrW;
@ -230,6 +236,28 @@ private:
@note mod of NIST_P192 is fast
*/
static void fp2_mulUseDblW(Unit *z, const Unit *x, const Unit *y)
{
const Fp *px = reinterpret_cast<const Fp*>(x);
const Fp *py = reinterpret_cast<const Fp*>(y);
const Fp& a = px[0];
const Fp& b = px[1];
const Fp& c = py[0];
const Fp& d = py[1];
FpDbl d0, d1, d2;
Fp s, t;
Fp::add(s, a, b);
Fp::add(t, c, d);
FpDbl::mulPre(d0, s, t); // (a + b)(c + d)
FpDbl::mulPre(d1, a, c);
FpDbl::mulPre(d2, b, d);
FpDbl::sub(d0, d0, d1); // (a + b)(c + d) - ac
FpDbl::sub(d0, d0, d2); // (a + b)(c + d) - ac - bd
Fp *pz = reinterpret_cast<Fp*>(z);
FpDbl::mod(pz[1], d0);
FpDbl::sub(d1, d1, d2); // ac - bd
FpDbl::mod(pz[0], d1); // set z0
}
static void fp2_mulUseDblUseNCW(Unit *z, const Unit *x, const Unit *y)
{
const Fp *px = reinterpret_cast<const Fp*>(x);
const Fp *py = reinterpret_cast<const Fp*>(y);

@ -87,7 +87,7 @@ struct Op {
// not require p(function having p)
void2u fp_neg;
void2u fp_sqr;
void3u fp_add;
void4u fp_add;
void3u fp_sub;
void3u fp_mul;
void2uI fp_mul_UnitPre; // z[N + 1] = x[N] * y

@ -295,7 +295,7 @@ struct OpeFunc {
#ifdef MCL_USE_LLVM
#define SET_OP_LLVM(n) \
if (mode == FP_LLVM || mode == FP_LLVM_MONT) { \
fp_addP = mcl_fp_add ## n; \
fp_add = mcl_fp_add ## n; \
fp_subP = mcl_fp_sub ## n; \
if (!isFullBit) { \
fp_addNC = mcl_fp_addNC ## n; \
@ -332,15 +332,15 @@ struct OpeFunc {
} else { \
fp_invOp = OpeFunc<n>::fp_invOpC; \
} \
fp_addP = OpeFunc<n>::fp_addPC; \
fp_add = OpeFunc<n>::fp_addPC; \
fp_subP = OpeFunc<n>::fp_subPC; \
fpDbl_addP = OpeFunc<n>::fpDbl_addPC; \
fpDbl_subP = OpeFunc<n>::fpDbl_subPC; \
if (isFullBit) { \
fp_addNC = fp_add; \
fp_subNC = fp_sub; \
fpDbl_addNC = fpDbl_add; \
fpDbl_subNC = fpDbl_sub; \
fp_addNC = 0; \
fp_subNC = 0; \
fpDbl_addNC = 0; \
fpDbl_subNC = 0; \
} else { \
fp_addNC = OpeFunc<n>::fp_addNCC; \
fp_subNC = OpeFunc<n>::fp_subNCC; \
@ -373,6 +373,7 @@ inline void invOpForMontC(Unit *y, const Unit *x, const Op& op)
static void initInvTbl(Op& op)
{
const size_t N = op.N;
const Unit *p = op.p;
const size_t invTblN = N * sizeof(Unit) * 8 * 2;
op.invTbl.resize(invTblN * N);
Unit *tbl = op.invTbl.data() + (invTblN - 1) * N;
@ -380,7 +381,7 @@ static void initInvTbl(Op& op)
t[0] = 2;
op.toMont(tbl, t);
for (size_t i = 0; i < invTblN - 1; i++) {
op.fp_add(tbl - N, tbl, tbl);
op.fp_add(tbl - N, tbl, tbl, p);
tbl -= N;
}
}

@ -193,15 +193,15 @@ struct FpGenerator : Xbyak::CodeGenerator {
setSize(0); // reset code
align(16);
op.fp_add = getCurr<void3u>();
op.fp_add = getCurr<void4u>();
gen_fp_add();
align(16);
op.fp_sub = getCurr<void3u>();
gen_fp_sub();
if (op.isFullBit) {
op.fp_addNC = op.fp_add;
op.fp_subNC = op.fp_sub;
op.fp_addNC = 0;
op.fp_subNC = 0;
} else {
align(16);
op.fp_addNC = getCurr<void3u>();

@ -96,6 +96,7 @@ static inline std::ostream& operator<<(std::ostream& os, const Int& x)
void testAddSub(const mcl::fp::Op& op)
{
Fp x, y;
const uint64_t *p = op.p;
Int mx(op.N), my(op.N);
x.setStr("0x8811aabb23427cc");
y.setStr("0x8811aabb23427cc11");
@ -104,12 +105,12 @@ void testAddSub(const mcl::fp::Op& op)
for (int i = 0; i < 30; i++) {
CYBOZU_TEST_EQUAL(mx, x);
x += x;
op.fp_add(mx.v, mx.v, mx.v);
op.fp_add(mx.v, mx.v, mx.v, p);
}
for (int i = 0; i < 30; i++) {
CYBOZU_TEST_EQUAL(mx, x);
x += y;
op.fp_add(mx.v, mx.v, my.v);
op.fp_add(mx.v, mx.v, my.v, p);
}
for (int i = 0; i < 30; i++) {
CYBOZU_TEST_EQUAL(my, y);
@ -140,7 +141,7 @@ void testNeg(const mcl::fp::Op& op)
void testMulI(const mcl::fp::FpGenerator& fg, int pn)
{
cybozu::XorShift rg;
printf("pn=%d, %p\n", pn, fg.mul_Unit_);
//printf("pn=%d, %p\n", pn, fg.mul_Unit_);
for (int i = 0; i < 100; i++) {
uint64_t x[MAX_N];
uint64_t z[MAX_N + 1];

@ -381,7 +381,7 @@ void opeTest()
z *= y;
CYBOZU_TEST_EQUAL(z, tbl[i].x);
}
{
if (!Fp::isFullBit()) {
Fp x(5), y(3), z;
Fp::addNC(z, x, y);
if (Fp::compareRaw(z, Fp::getP()) >= 0) {

Loading…
Cancel
Save