Merge branch 'dev'

update-fork
MITSUNARI Shigeo 4 years ago
commit 2fc942b06e
  1. 2
      .github/workflows/main.yml
  2. 6
      Makefile
  3. 14
      ffi/java/Makefile
  4. 2
      ffi/java/com/herumi/mcl/CipherText.java
  5. 2
      ffi/java/com/herumi/mcl/Elgamal.java
  6. 2
      ffi/java/com/herumi/mcl/ElgamalJNI.java
  7. 2
      ffi/java/com/herumi/mcl/PrivateKey.java
  8. 2
      ffi/java/com/herumi/mcl/PublicKey.java
  9. 2
      ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
  10. 6
      ffi/java/elgamal_wrap.cxx
  11. 2
      ffi/java/mcl_wrap.cxx
  12. 2
      include/mcl/gmp_util.hpp
  13. 1
      readme.md
  14. 18288
      src/asm/aarch64.s
  15. 103811
      src/asm/arm.s
  16. 19587
      src/asm/x86-64.bmi2.s
  17. 22405
      src/asm/x86-64.s
  18. 18465
      src/asm/x86-64mac.bmi2.s
  19. 21489
      src/asm/x86-64mac.s
  20. 93322
      src/asm/x86.bmi2.s
  21. 96675
      src/asm/x86.s
  22. 68204
      src/base32.ll
  23. 19538
      src/base64.ll
  24. 43
      src/fp.cpp
  25. 96
      src/gen.cpp
  26. 16
      src/low_func.hpp
  27. 44
      src/low_func_llvm.hpp
  28. 38
      src/proto.hpp
  29. 4
      test/fp_generator_test.cpp
  30. 28
      test/fp_test.cpp
  31. 10
      test/fp_tower_test.cpp
  32. 6
      test/glv_test.cpp
  33. 130
      test/llvm_test.cpp
  34. 10
      test/mont_fp_test.cpp

@ -16,6 +16,6 @@ jobs:
- run: make clean
- run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail
- run: make clean
- run: make test_go
# - run: make test_go
- run: sudo apt install openjdk-8-jdk
- run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include

@ -358,6 +358,12 @@ bin/emu:
bin/pairing_c_min.exe: sample/pairing_c.c include/mcl/vint.hpp src/fp.cpp include/mcl/bn.hpp
$(CXX) -std=c++03 -O3 -g -fno-threadsafe-statics -fno-exceptions -fno-rtti -o $@ sample/pairing_c.c src/fp.cpp src/bn_c384_256.cpp -I./include -DXBYAK_NO_EXCEPTION -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384 -DMCL_VINT_64BIT_PORTABLE -DCYBOZU_DONT_USE_STRING -DCYBOZU_DONT_USE_EXCEPTION -DNDEBUG # -DMCL_DONT_USE_CSPRNG
bin/llvm_test64.exe: test/llvm_test.cpp src/base64.ll
clang++$(LLVM_VER) -o $@ -Ofast -DNDEBUG -Wall -Wextra -I ./include test/llvm_test.cpp src/base64.ll
bin/llvm_test32.exe: test/llvm_test.cpp src/base32.ll
clang++$(LLVM_VER) -o $@ -Ofast -DNDEBUG -Wall -Wextra -I ./include test/llvm_test.cpp src/base32.ll -m32
make_tbl:
$(MAKE) ../bls/src/qcoeff-bn254.hpp

@ -1,14 +1,18 @@
TOP_DIR=../..
include $(TOP_DIR)/common.mk
ifeq ($(UNAME_S),Darwin)
JAVA_INC?=-I/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/JavaVM.framework/Headers/
JAVA_INC_DIR?=/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/JavaVM.framework/Headers/
JAVA_INC?=-I$(JAVA_INC_DIR)
CFLAGS+=$(JAVA_INC)/darwin
else
JAVA_INC?=-I/usr/lib/jvm/default-java/include
JAVA_INC_DIR?=/usr/lib/jvm/default-java/include
JAVA_INC?=-I$(JAVA_INC_DIR)
#JAVA_INC=-I/usr/lib/jvm/java-7-openjdk-amd64/include
CFLAGS+=$(JAVA_INC)/linux
CFLAGS+=-z noexecstack
LDFLAGS+=-lrt
endif
CFLAGS+=$(JAVA_INC) $(JAVA_INC)/linux -I $(TOP_DIR)/include -I $(TOP_DIR)/../xbyak -I $(TOP_DIR)/../cybozulib/include -Wno-strict-aliasing
CFLAGS+=$(JAVA_INC) -I $(TOP_DIR)/include -I -Wno-strict-aliasing
MCL_LIB=$(TOP_DIR)/lib/libmcl.a
PACKAGE_NAME=com.herumi.mcl
@ -28,7 +32,7 @@ mcl_wrap.cxx: mcl.i mcl_impl.hpp
swig -java -package $(PACKAGE_NAME) -outdir $(PACKAGE_DIR) -c++ -Wall mcl.i
$(MCL_LIB):
make -C $(TOP_DIR)
make -C $(TOP_DIR) lib/libmcl.a
$(ELGAMAL_LIB): elgamal_wrap.cxx $(MCL_LIB)
$(PRE)$(CXX) $< -o $@ $(CFLAGS) $(MCL_LIB) $(LDFLAGS) -shared
@ -51,7 +55,7 @@ test_elgamal: ElgamalTest.class $(ELGAMAL_LIB)
$(JAVA_EXE) ElgamalTest -e NIST_P256 -h sha256
$(JAVA_EXE) ElgamalTest -e secp256k1 -h sha256
$(JAVA_EXE) ElgamalTest -e NIST_P384 -h sha384
$(JAVA_EXE) ElgamalTest -e NIST_P521 -h sha512
# $(JAVA_EXE) ElgamalTest -e NIST_P521 -h sha512
test_mcl: MclTest.class $(MCLJAVA_LIB)
$(JAVA_EXE) MclTest

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0
* Version 4.0.2
*
* Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0
* Version 4.0.2
*
* Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0
* Version 4.0.2
*
* Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0
* Version 4.0.2
*
* Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0
* Version 4.0.2
*
* Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0
* Version 4.0.2
*
* Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0
* Version 4.0.2
*
* This file is not intended to be easily readable and contains a number of
* coding conventions designed to improve portability and efficiency. Do not make
@ -190,7 +190,8 @@ typedef enum {
SWIG_JavaIllegalArgumentException,
SWIG_JavaNullPointerException,
SWIG_JavaDirectorPureVirtual,
SWIG_JavaUnknownError
SWIG_JavaUnknownError,
SWIG_JavaIllegalStateException,
} SWIG_JavaExceptionCodes;
typedef struct {
@ -211,6 +212,7 @@ static void SWIGUNUSED SWIG_JavaThrowException(JNIEnv *jenv, SWIG_JavaExceptionC
{ SWIG_JavaNullPointerException, "java/lang/NullPointerException" },
{ SWIG_JavaDirectorPureVirtual, "java/lang/RuntimeException" },
{ SWIG_JavaUnknownError, "java/lang/UnknownError" },
{ SWIG_JavaIllegalStateException, "java/lang/IllegalStateException" },
{ (SWIG_JavaExceptionCodes)0, "java/lang/UnknownError" }
};
const SWIG_JavaExceptions_t *except_ptr = java_exceptions;

@ -1623,6 +1623,7 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isZero(JNIEnv *jenv,
return jresult;
}
SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isValidOrder(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
jboolean jresult = 0 ;
G1 *arg1 = (G1 *) 0 ;
@ -1637,6 +1638,7 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isValidOrder(JNIEnv *
return jresult;
}
SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
G1 *arg1 = (G1 *) 0 ;
Fp *arg2 = 0 ;

@ -28,7 +28,7 @@
#define MCL_USE_VINT
#endif
#ifndef MCL_MAX_BIT_SIZE
#define MCL_MAX_BIT_SIZE 521
#define MCL_MAX_BIT_SIZE 512
#endif
#ifdef MCL_USE_VINT
#include <mcl/vint.hpp>

@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
# News
- set default `MCL_MAX_BIT_SIZE=512` so disable to support `NICT_P521`.
- improve performance
- support M1 mac
- dst for mapToG1 has changed to `BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_`.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -309,11 +309,12 @@ void setOp2(Op& op)
if (op.isFullBit) {
op.fp_mul = Mont<N, true, Tag>::f;
op.fp_sqr = SqrMont<N, true, Tag>::f;
op.fpDbl_mod = MontRed<N, true, Tag>::f;
} else {
op.fp_mul = Mont<N, false, Tag>::f;
op.fp_sqr = SqrMont<N, false, Tag>::f;
op.fpDbl_mod = MontRed<N, false, Tag>::f;
}
op.fpDbl_mod = MontRed<N, Tag>::f;
} else {
op.fp_mul = Mul<N, Tag>::f;
op.fp_sqr = Sqr<N, Tag>::f;
@ -568,38 +569,16 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
}
#endif
switch (N) {
case 1: setOp<1>(*this, mode); break;
case 2: setOp<2>(*this, mode); break;
case 3: setOp<3>(*this, mode); break;
case 4: setOp<4>(*this, mode); break; // 256 if 64-bit
#if MCL_MAX_UNIT_SIZE >= 6
case 5: setOp<5>(*this, mode); break;
case 6: setOp<6>(*this, mode); break;
case 192/CYBOZU_OS_BIT: setOp<192/CYBOZU_OS_BIT>(*this, mode); break;
#if CYBOZU_OS_BIT == 32
case 224/CYBOZU_OS_BIT: setOp<224/CYBOZU_OS_BIT>(*this, mode); break;
#endif
#if MCL_MAX_UNIT_SIZE >= 8
case 7: setOp<7>(*this, mode); break;
case 8: setOp<8>(*this, mode); break;
case 256/CYBOZU_OS_BIT: setOp<256/CYBOZU_OS_BIT>(*this, mode); break;
#if MCL_MAX_BIT_SIZE >= 384
case 384/CYBOZU_OS_BIT: setOp<384/CYBOZU_OS_BIT>(*this, mode); break;
#endif
#if MCL_MAX_UNIT_SIZE >= 9
case 9: setOp<9>(*this, mode); break; // 521 if 64-bit
#endif
#if MCL_MAX_UNIT_SIZE >= 10
case 10: setOp<10>(*this, mode); break;
#endif
#if MCL_MAX_UNIT_SIZE >= 12
case 11: setOp<11>(*this, mode); break;
case 12: setOp<12>(*this, mode); break; // 768 if 64-bit
#endif
#if MCL_MAX_UNIT_SIZE >= 14
case 13: setOp<13>(*this, mode); break;
case 14: setOp<14>(*this, mode); break;
#endif
#if MCL_MAX_UNIT_SIZE >= 16
case 15: setOp<15>(*this, mode); break;
case 16: setOp<16>(*this, mode); break; // 1024 if 64-bit
#endif
#if MCL_MAX_UNIT_SIZE >= 17
case 17: setOp<17>(*this, mode); break; // 521 if 32-bit
#if MCL_MAX_BIT_SIZE >= 512
case 512/CYBOZU_OS_BIT: setOp<512/CYBOZU_OS_BIT>(*this, mode); break;
#endif
default:
return false;
@ -617,10 +596,12 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
fp_sqr = &mcl_fp_sqr_NIST_P192L;
fpDbl_mod = &mcl_fpDbl_mod_NIST_P192L;
}
#if MCL_MAX_BIT_SIZE >= 521
if (primeMode == PM_NIST_P521) {
fpDbl_mod = &mcl_fpDbl_mod_NIST_P521L;
}
#endif
#endif
#if defined(MCL_USE_VINT) && MCL_SIZEOF_UNIT == 8
if (primeMode == PM_SECP256K1) {
fp_mul = &mcl::vint::mcl_fp_mul_SECP256K1;

@ -669,7 +669,7 @@ struct Code : public mcl::Generator {
Operand z(Int, bu);
Operand px(IntPtr, unit);
Operand y(Int, unit);
std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit);
std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit) + suf;
mulPvM[bit] = Function(name, z, px, y);
// workaround at https://github.com/herumi/mcl/pull/82
// mulPvM[bit].setPrivate();
@ -715,11 +715,12 @@ struct Code : public mcl::Generator {
Operand z = mul(x, y);
storeN(z, pz);
ret(Void);
} else if (N >= 8 && (N % 2) == 0) {
} else if (N > 8 && (N % 2) == 0) {
/*
W = 1 << half
(aW + b)(cW + d) = acW^2 + (ad + bc)W + bd
ad + bc = (a + b)(c + d) - ac - bd
@note Karatsuba is slower for N = 8
*/
const int H = N / 2;
const int half = bit / 2;
@ -883,37 +884,79 @@ struct Code : public mcl::Generator {
ret(Void);
endFunc();
}
void gen_mcl_fp_montRed()
// return [H:L]
Operand pack(Operand H, Operand L)
{
int size = H.bit + L.bit;
H = zext(H, size);
H = shl(H, L.bit);
L = zext(L, size);
H = _or(H, L);
return H;
}
// split x to [ret:L] s.t. size of L = sizeL
Operand split(Operand *L, const Operand& x, int sizeL)
{
Operand ret = lshr(x, sizeL);
ret = trunc(ret, ret.bit - sizeL);
*L = trunc(x, sizeL);
return ret;
}
void gen_mcl_fp_montRed(bool isFullBit = true)
{
const int bu = bit + unit;
const int b2 = bit * 2;
const int b2u = b2 + unit;
resetGlobalIdx();
Operand pz(IntPtr, unit);
Operand pxy(IntPtr, unit);
Operand pp(IntPtr, unit);
std::string name = "mcl_fp_montRed" + cybozu::itoa(N) + "L" + suf;
std::string name = "mcl_fp_montRed";
if (!isFullBit) {
name += "NF";
}
name += cybozu::itoa(N) + "L" + suf;
mcl_fp_montRedM[N] = Function(name, Void, pz, pxy, pp);
verifyAndSetPrivate(mcl_fp_montRedM[N]);
beginFunc(mcl_fp_montRedM[N]);
Operand rp = load(getelementptr(pp, -1));
Operand p = loadN(pp, N);
Operand xy = loadN(pxy, N * 2);
Operand t = zext(xy, b2 + unit);
const int bu = bit + unit;
const int bu2 = bit + unit * 2;
Operand t = loadN(pxy, N);
Operand H;
for (uint32_t i = 0; i < N; i++) {
Operand z = trunc(t, unit);
Operand q = mul(z, rp);
Operand q;
if (N == 1) {
q = mul(t, rp);
} else {
q = mul(trunc(t, unit), rp);
}
Operand pq = call(mulPvM[bit], pp, q);
pq = zext(pq, b2u - unit * i);
z = add(t, pq);
z = lshr(z, unit);
t = trunc(z, b2 - unit * i);
if (i > 0) {
H = zext(H, bu);
H = shl(H, bit);
pq = add(pq, H);
}
Operand next = load(getelementptr(pxy, N + i));
t = pack(next, t);
t = zext(t, bu2);
pq = zext(pq, bu2);
t = add(t, pq);
t = lshr(t, unit);
t = trunc(t, bu);
H = split(&t, t, bit);
}
Operand z;
if (isFullBit) {
p = zext(p, bu);
t = zext(t, bu);
Operand vc = sub(t, p);
Operand c = trunc(lshr(vc, bit), 1);
Operand z = select(c, t, vc);
z = select(c, t, vc);
z = trunc(z, bit);
} else {
Operand vc = sub(t, p);
Operand c = trunc(lshr(vc, bit - 1), 1);
z = select(c, t, vc);
}
storeN(z, pz);
ret(Void);
endFunc();
@ -941,7 +984,8 @@ struct Code : public mcl::Generator {
gen_mcl_fpDbl_sqrPre();
gen_mcl_fp_mont(true);
gen_mcl_fp_mont(false);
gen_mcl_fp_montRed();
gen_mcl_fp_montRed(true);
gen_mcl_fp_montRed(false);
}
void setBit(uint32_t bit)
{
@ -962,6 +1006,23 @@ struct Code : public mcl::Generator {
gen_mulUU();
#else
gen_once();
#if 1
int bitTbl[] = {
192,
224,
256,
384,
512
};
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(bitTbl); i++) {
uint32_t bit = bitTbl[i];
if (unit == 64 && bit == 224) continue;
setBit(bit);
gen_mul();
gen_all();
gen_addsub();
}
#else
uint32_t end = ((maxBitSize + unit - 1) / unit);
for (uint32_t n = 1; n <= end; n++) {
setBit(n * unit);
@ -969,6 +1030,7 @@ struct Code : public mcl::Generator {
gen_all();
gen_addsub();
}
#endif
if (unit == 64 && maxBitSize == 768) {
for (uint32_t i = maxBitSize + unit * 2; i <= maxBitSize * 2; i += unit * 2) {
setBit(i);

@ -510,7 +510,7 @@ const void4u DblSub<N, Tag>::f = DblSub<N, Tag>::func;
z[N] <- montRed(xy[N * 2], p[N])
REMARK : assume p[-1] = rp
*/
template<size_t N, class Tag = Gtag>
template<size_t N, bool isFullBit, class Tag = Gtag>
struct MontRed {
static inline void func(Unit *z, const Unit *xy, const Unit *p)
{
@ -546,8 +546,8 @@ struct MontRed {
static const void3u f;
};
template<size_t N, class Tag>
const void3u MontRed<N, Tag>::f = MontRed<N, Tag>::func;
template<size_t N, bool isFullBit, class Tag>
const void3u MontRed<N, isFullBit, Tag>::f = MontRed<N, isFullBit, Tag>::func;
/*
z[N] <- Montgomery(x[N], y[N], p[N])
@ -560,7 +560,7 @@ struct Mont {
#if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
Unit xy[N * 2];
MulPre<N, Tag>::f(xy, x, y);
MontRed<N, Tag>::f(z, xy, p);
MontRed<N, isFullBit, Tag>::f(z, xy, p);
#else
const Unit rp = p[-1];
if (isFullBit) {
@ -641,10 +641,10 @@ template<size_t N, bool isFullBit, class Tag = Gtag>
struct SqrMont {
static inline void func(Unit *y, const Unit *x, const Unit *p)
{
#if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
#if 0 // #if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
Unit xx[N * 2];
SqrPre<N, Tag>::f(xx, x);
MontRed<N, Tag>::f(y, xx, p);
MontRed<N, isFullBit, Tag>::f(y, xx, p);
#else
Mont<N, isFullBit, Tag>::f(y, x, x, p);
#endif
@ -702,9 +702,9 @@ struct Fp2MulNF {
MulPre<N, Tag>::f(d2, b, d);
SubPre<N * 2, Tag>::f(d0, d0, d1);
SubPre<N * 2, Tag>::f(d0, d0, d2);
MontRed<N, Tag>::f(z + N, d0, p);
MontRed<N, false, Tag>::f(z + N, d0, p);
DblSub<N, Tag>::f(d1, d1, d2, p);
MontRed<N, Tag>::f(z, d1, p);
MontRed<N, false, Tag>::f(z, d1, p);
}
static const void4u f;
};

@ -37,7 +37,8 @@ template<>const void4u Sub<n, true, tag>::f = &mcl_fp_sub ## n ## suf; \
template<>const void4u Sub<n, false, tag>::f = &mcl_fp_subNF ## n ## suf; \
template<>const void4u Mont<n, true, tag>::f = &mcl_fp_mont ## n ## suf; \
template<>const void4u Mont<n, false, tag>::f = &mcl_fp_montNF ## n ## suf; \
template<>const void3u MontRed<n, tag>::f = &mcl_fp_montRed ## n ## suf; \
template<>const void3u MontRed<n, true, tag>::f = &mcl_fp_montRed ## n ## suf; \
template<>const void3u MontRed<n, false, tag>::f = &mcl_fp_montRedNF ## n ## suf; \
template<>const void4u DblAdd<n, tag>::f = &mcl_fpDbl_add ## n ## suf; \
template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
@ -50,44 +51,29 @@ template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
MCL_DEF_LLVM_FUNC2(n, Ltag, L)
#endif
MCL_DEF_LLVM_FUNC(1)
MCL_DEF_LLVM_FUNC(2)
MCL_DEF_LLVM_FUNC(3)
MCL_DEF_LLVM_FUNC(4)
#if MCL_MAX_UNIT_SIZE >= 6
MCL_DEF_LLVM_FUNC(5)
#if CYBOZU_OS_BIT == 32
MCL_DEF_LLVM_FUNC(6)
#endif
#if MCL_MAX_UNIT_SIZE >= 8
MCL_DEF_LLVM_FUNC(7)
MCL_DEF_LLVM_FUNC(8)
#endif
#if MCL_MAX_UNIT_SIZE >= 9
MCL_DEF_LLVM_FUNC(9)
#endif
#if MCL_MAX_UNIT_SIZE >= 10
MCL_DEF_LLVM_FUNC(10)
#endif
#if MCL_MAX_UNIT_SIZE >= 12
MCL_DEF_LLVM_FUNC(11)
MCL_DEF_LLVM_FUNC(12)
#endif
#if MCL_MAX_UNIT_SIZE >= 14
MCL_DEF_LLVM_FUNC(13)
MCL_DEF_LLVM_FUNC(14)
#endif
#if MCL_MAX_UNIT_SIZE >= 16
MCL_DEF_LLVM_FUNC(15)
#if MCL_SIZEOF_UNIT == 4
MCL_DEF_LLVM_FUNC(16)
#else
/// QQQ : check speed
template<>const void3u MontRed<16, Ltag>::f = &mcl_fp_montRed16L;
template<>const void3u MontRed<16, LBMI2tag>::f = &mcl_fp_montRed16Lbmi2;
#endif
#else // 64
MCL_DEF_LLVM_FUNC(3)
MCL_DEF_LLVM_FUNC(4)
#if MCL_MAX_UNIT_SIZE >= 6
MCL_DEF_LLVM_FUNC(6)
#endif
#if MCL_MAX_UNIT_SIZE >= 17
MCL_DEF_LLVM_FUNC(17)
#if MCL_MAX_UNIT_SIZE >= 8
MCL_DEF_LLVM_FUNC(8)
#endif
#endif
} } // mcl::fp

@ -22,6 +22,7 @@ void mcl_fpDbl_sqrPre ## n ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
void mcl_fp_mont ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_montNF ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_montRed ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p); \
void mcl_fp_montRedNF ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p); \
void mcl_fpDbl_add ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fpDbl_sub ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p);
@ -37,38 +38,29 @@ void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, c
extern "C" {
MCL_FP_DEF_FUNC(1)
MCL_FP_DEF_FUNC(2)
MCL_FP_DEF_FUNC(3)
MCL_FP_DEF_FUNC(4)
#if MCL_MAX_UNIT_SIZE >= 6
MCL_FP_DEF_FUNC(5)
#if CYBOZU_OS_BIT == 32
MCL_FP_DEF_FUNC(6)
#endif
#if MCL_MAX_UNIT_SIZE >= 8
MCL_FP_DEF_FUNC(7)
MCL_FP_DEF_FUNC(8)
#endif
#if MCL_MAX_UNIT_SIZE >= 9
MCL_FP_DEF_FUNC(9)
#endif
#if MCL_MAX_UNIT_SIZE >= 10
MCL_FP_DEF_FUNC(10)
#endif
#if MCL_MAX_UNIT_SIZE >= 12
MCL_FP_DEF_FUNC(11)
MCL_FP_DEF_FUNC(12)
#endif
#if MCL_MAX_UNIT_SIZE >= 14
MCL_FP_DEF_FUNC(13)
MCL_FP_DEF_FUNC(14)
#endif
#if MCL_MAX_UNIT_SIZE >= 16
MCL_FP_DEF_FUNC(15)
MCL_FP_DEF_FUNC(16)
#endif
#if MCL_MAX_UNIT_SIZE >= 17
MCL_FP_DEF_FUNC(17)
#else // 64
MCL_FP_DEF_FUNC(3)
MCL_FP_DEF_FUNC(4)
#if MCL_MAX_UNIT_SIZE >= 6
MCL_FP_DEF_FUNC(6)
#endif
#if MCL_MAX_UNIT_SIZE >= 8
MCL_FP_DEF_FUNC(8)
#endif
#endif
MCL_FP_DEF_FUNC_SPECIAL(L)

@ -15,10 +15,14 @@ typedef mcl::FpT<> Fp;
const int MAX_N = 4;
const char *primeTable[] = {
#if 0
"0x7fffffffffffffffffffffffffffffff", // 127bit(not full)
"0xffffffffffffffffffffffffffffff61", // 128bit(full)
#endif
"0x7fffffffffffffffffffffffffffffffffffffffffffffed", // 191bit(not full)
"0xfffffffffffffffffffffffffffffffffffffffeffffee37", // 192bit(full)
"0x2523648240000001ba344d80000000086121000000000013a700000000000013", // 254bit(not full)
"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // 256bit(full)
};
void strToArray(uint64_t *p, size_t n, const char *pStr)

@ -426,6 +426,7 @@ void powTest()
CYBOZU_TEST_EQUAL(z, 1);
Fp::pow(z, x, Fp::getOp().mp);
CYBOZU_TEST_EQUAL(z, x);
#if 0
typedef mcl::FpT<tag2, 128> Fp_other;
Fp_other::init("1009");
x = 5;
@ -436,6 +437,7 @@ void powTest()
x = 5;
Fp::pow(x, x, n);
CYBOZU_TEST_EQUAL(x, 125);
#endif
}
void mulUnitTest()
@ -487,6 +489,7 @@ void powGmp()
struct TagAnother;
#if 0
void anotherFpTest(mcl::fp::Mode mode)
{
typedef mcl::FpT<TagAnother, 128> G;
@ -496,6 +499,7 @@ void anotherFpTest(mcl::fp::Mode mode)
a *= b;
CYBOZU_TEST_EQUAL(a, 1);
}
#endif
void setArrayTest1()
{
@ -508,6 +512,7 @@ void setArrayTest1()
CYBOZU_TEST_EQUAL(x, Fp("0x3400000012"));
}
#if 0
void setArrayTest2(mcl::fp::Mode mode)
{
Fp::init("0x10000000000001234567a5", mode);
@ -529,6 +534,7 @@ void setArrayTest2(mcl::fp::Mode mode)
uint32_t large[3] = { 0x234567a5, 0x00000001, 0x00100000};
CYBOZU_TEST_EXCEPTION(x.setArray(large, 3), cybozu::Exception);
}
#endif
void setArrayMaskTest1()
{
@ -541,6 +547,7 @@ void setArrayMaskTest1()
CYBOZU_TEST_EQUAL(x, Fp("0x3400000012"));
}
#if 0
void setArrayMaskTest2(mcl::fp::Mode mode)
{
Fp::init("0x10000000000001234567a5", mode);
@ -560,6 +567,7 @@ void setArrayMaskTest2(mcl::fp::Mode mode)
CYBOZU_TEST_EQUAL(x, Fp(tbl[i].expected));
}
}
#endif
void setArrayModTest()
{
@ -602,13 +610,13 @@ void setArrayModTest()
CYBOZU_TEST_AUTO(set64bit)
{
Fp::init("0x1000000000000000000f");
Fp::init("3138550867693340381917894711603833208051177722232017256453");
const struct {
const char *p;
int64_t i;
} tbl[] = {
{ "0x1234567812345678", int64_t(0x1234567812345678ull) },
{ "0xfffedcba987edcba997", -int64_t(0x1234567812345678ull) },
{ "-5", -5 },
};
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
Fp x(tbl[i].p);
@ -935,17 +943,7 @@ void sub(mcl::fp::Mode mode)
{
printf("mode=%s\n", mcl::fp::ModeToStr(mode));
const char *tbl[] = {
// N = 2
"0x0000000000000001000000000000000d",
"0x7fffffffffffffffffffffffffffffff",
"0x8000000000000000000000000000001d",
"0xffffffffffffffffffffffffffffff61",
// N = 3
"0x000000000000000100000000000000000000000000000033", // min prime
"0x00000000fffffffffffffffffffffffffffffffeffffac73",
"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
"0x000000010000000000000000000000000000000000000007",
"0x30000000000000000000000000000000000000000000002b",
"0x70000000000000000000000000000000000000000000001f",
"0x800000000000000000000000000000000000000000000005",
@ -1001,9 +999,9 @@ void sub(mcl::fp::Mode mode)
serializeTest();
modpTest();
}
anotherFpTest(mode);
setArrayTest2(mode);
setArrayMaskTest2(mode);
// anotherFpTest(mode);
// setArrayTest2(mode);
// setArrayMaskTest2(mode);
}
std::string g_mode;

@ -430,17 +430,7 @@ void test(const char *p, mcl::fp::Mode mode)
void testAll()
{
const char *tbl[] = {
// N = 2
"0x0000000000000001000000000000000d",
"0x7fffffffffffffffffffffffffffffff",
"0x8000000000000000000000000000001d",
"0xffffffffffffffffffffffffffffff61",
// N = 3
"0x000000000000000100000000000000000000000000000033", // min prime
"0x00000000fffffffffffffffffffffffffffffffeffffac73",
"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
"0x000000010000000000000000000000000000000000000007",
"0x30000000000000000000000000000000000000000000002b",
"0x70000000000000000000000000000000000000000000001f",
"0x800000000000000000000000000000000000000000000005",

@ -153,10 +153,13 @@ void testGLV1()
GLV1::mul(P2, P0, ss, true);
CYBOZU_TEST_EQUAL(P1, P2);
}
#ifndef NDEBUG
puts("skip testGLV1 in debug");
Fr s;
mapToG1(P0, 123);
CYBOZU_BENCH_C("Ec::mul", 100, P1 = P0; s.setRand(rg); G1::mulGeneric, P2, P1, s.getMpz());
CYBOZU_BENCH_C("Ec::glv", 100, P1 = P0; s.setRand(rg); GLV1::mul, P2, P1, s.getMpz());
#endif
}
/*
@ -188,10 +191,13 @@ void testGLV2()
GLV2::mul(Q2, Q0, n);
CYBOZU_TEST_EQUAL(Q1, Q2);
}
#ifndef NDEBUG
puts("skip testGLV2 in debug");
Fr s;
mapToG2(Q0, 123);
CYBOZU_BENCH_C("G2::mul", 1000, Q2 = Q0; s.setRand(rg); G2::mulGeneric, Q2, Q1, s.getMpz());
CYBOZU_BENCH_C("G2::glv", 1000, Q1 = Q0; s.setRand(rg); GLV2::mul, Q2, Q1, s.getMpz());
#endif
}
void testGT()

@ -0,0 +1,130 @@
/*
32bit raspi
N=6
mulPre 511.30nsec
sqrPre 598.33nsec
mod 769.64nsec
mont 1.283usec
N=8
mulPre 1.463usec
sqrPre 1.422usec
mod 1.972usec
mont 2.962usec
N=12
mulPre 2.229usec
sqrPre 2.056usec
mod 3.811usec
mont 6.802usec
N=16
mulPre 4.955usec
sqrPre 4.706usec
mod 6.817usec
mont 12.916usec
*/
#include <stdio.h>
#include <stdint.h>
#include <cybozu/inttype.hpp>
#include <cybozu/benchmark.hpp>
#include <cybozu/xorshift.hpp>
typedef size_t Unit;
template<size_t N>
void mulPre(Unit*, const Unit*, const Unit*);
template<size_t N>
void sqrPre(Unit*, const Unit*);
template<size_t N>
void mod(Unit*, const Unit*, const Unit *);
template<size_t N>
void mont(Unit*, const Unit*, const Unit*, const Unit *);
#define MCL_FP_DEF_FUNC_SUB(n, suf) \
extern "C" { \
void mcl_fp_add ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_addNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_sub ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_subNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_shr1_ ## n ## suf(Unit*y, const Unit* x); \
Unit mcl_fp_addPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
Unit mcl_fp_subPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
void mcl_fp_mulUnitPre ## n ## suf(Unit* z, const Unit* x, Unit y); \
void mcl_fpDbl_mulPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
void mcl_fpDbl_sqrPre ## n ## suf(Unit* y, const Unit* x); \
void mcl_fp_mont ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_montNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_montRed ## n ## suf(Unit* z, const Unit* xy, const Unit* p); \
void mcl_fp_montRedNF ## n ## suf(Unit* z, const Unit* xy, const Unit* p); \
void mcl_fpDbl_add ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fpDbl_sub ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
} \
template<>void mulPre<n>(Unit *z, const Unit *x, const Unit *y) { mcl_fpDbl_mulPre ## n ## suf(z, x, y); } \
template<>void sqrPre<n>(Unit *z, const Unit *x) { mcl_fpDbl_sqrPre ## n ## suf(z, x); } \
template<>void mod<n>(Unit *z, const Unit *x, const Unit *p) { mcl_fp_montRedNF ## n ## suf(z, x, p); } \
template<>void mont<n>(Unit *z, const Unit *x, const Unit *y, const Unit *p) { mcl_fp_montNF ## n ## suf(z, x, y, p); }
#if CYBOZU_OS_BIT == 64
MCL_FP_DEF_FUNC_SUB(4, L)
MCL_FP_DEF_FUNC_SUB(5, L)
#endif
MCL_FP_DEF_FUNC_SUB(6, L)
//MCL_FP_DEF_FUNC_SUB(7, L)
MCL_FP_DEF_FUNC_SUB(8, L)
#if CYBOZU_OS_BIT == 32
MCL_FP_DEF_FUNC_SUB(12, L)
MCL_FP_DEF_FUNC_SUB(16, L)
#endif
template<class RG, class T>
void setRand(T *x, size_t n, RG& rg)
{
for (size_t i = 0; i < n; i++) {
if (sizeof(T) == 4) {
x[i] = rg.get32();
} else {
x[i] = rg.get64();
}
}
}
template<size_t N>
void bench(Unit *x, Unit *y, const Unit *p)
{
printf("N=%zd\n", N);
Unit xx[N * 2], yy[N * 2];
#if CYBOZU_OS_BIT == 64
const int C = 10000;
#else
const int C = 1000;
#endif
CYBOZU_BENCH_C("mulPre", C, mulPre<N>, xx, x, y);
CYBOZU_BENCH_C("sqrPre", C, sqrPre<N>, yy, x);
CYBOZU_BENCH_C("mod ", C, mod<N>, yy, xx, p);
CYBOZU_BENCH_C("mont ", C, mont<N>, yy, x, y, p);
}
int main()
{
printf("sizeof(Unit)=%zd\n", sizeof(Unit));
const size_t maxN = 16;
Unit x[maxN], y[maxN], p[maxN + 1];
cybozu::XorShift rg;
setRand(x, maxN, rg);
setRand(y, maxN, rg);
setRand(p, maxN + 1, rg);
#if CYBOZU_OS_BIT == 64
bench<4>(x, y, p + 1);
bench<5>(x, y, p + 1);
#endif
bench<6>(x, y, p + 1);
// bench<7>(x, y, p + 1);
bench<8>(x, y, p + 1);
#if CYBOZU_OS_BIT == 32
bench<12>(x, y, p + 1);
bench<16>(x, y, p + 1);
#endif
}

@ -244,17 +244,7 @@ CYBOZU_TEST_AUTO(test)
Test test;
const char *tbl[] = {
#if 1
// N = 2
"0x0000000000000001000000000000000d",
"0x7fffffffffffffffffffffffffffffff",
"0x8000000000000000000000000000001d",
"0xffffffffffffffffffffffffffffff61",
// N = 3
"0x000000000000000100000000000000000000000000000033", // min prime
"0x00000000fffffffffffffffffffffffffffffffeffffac73",
"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
"0x000000010000000000000000000000000000000000000007",
"0x30000000000000000000000000000000000000000000002b",
"0x70000000000000000000000000000000000000000000001f",
"0x800000000000000000000000000000000000000000000005",

Loading…
Cancel
Save