Merge branch 'dev'

update-fork
MITSUNARI Shigeo 4 years ago
commit 2fc942b06e
  1. 2
      .github/workflows/main.yml
  2. 6
      Makefile
  3. 14
      ffi/java/Makefile
  4. 2
      ffi/java/com/herumi/mcl/CipherText.java
  5. 2
      ffi/java/com/herumi/mcl/Elgamal.java
  6. 2
      ffi/java/com/herumi/mcl/ElgamalJNI.java
  7. 2
      ffi/java/com/herumi/mcl/PrivateKey.java
  8. 2
      ffi/java/com/herumi/mcl/PublicKey.java
  9. 2
      ffi/java/com/herumi/mcl/SWIGTYPE_p_bool.java
  10. 12
      ffi/java/elgamal_wrap.cxx
  11. 24
      ffi/java/mcl_wrap.cxx
  12. 2
      include/mcl/gmp_util.hpp
  13. 1
      readme.md
  14. 18428
      src/asm/aarch64.s
  15. 109779
      src/asm/arm.s
  16. 19545
      src/asm/x86-64.bmi2.s
  17. 22911
      src/asm/x86-64.s
  18. 19031
      src/asm/x86-64mac.bmi2.s
  19. 22245
      src/asm/x86-64mac.s
  20. 94788
      src/asm/x86.bmi2.s
  21. 98329
      src/asm/x86.s
  22. 68204
      src/base32.ll
  23. 19538
      src/base64.ll
  24. 43
      src/fp.cpp
  25. 104
      src/gen.cpp
  26. 16
      src/low_func.hpp
  27. 44
      src/low_func_llvm.hpp
  28. 38
      src/proto.hpp
  29. 4
      test/fp_generator_test.cpp
  30. 28
      test/fp_test.cpp
  31. 10
      test/fp_tower_test.cpp
  32. 6
      test/glv_test.cpp
  33. 130
      test/llvm_test.cpp
  34. 10
      test/mont_fp_test.cpp

@ -16,6 +16,6 @@ jobs:
- run: make clean - run: make clean
- run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail - run: make test_ci DEBUG=1 -j4 CXX=clang++ || dmesg | tail
- run: make clean - run: make clean
- run: make test_go # - run: make test_go
- run: sudo apt install openjdk-8-jdk - run: sudo apt install openjdk-8-jdk
- run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include - run: make -C ffi/java test JAVA_INC=-I/usr/lib/jvm/java-8-openjdk-amd64/include

@ -358,6 +358,12 @@ bin/emu:
bin/pairing_c_min.exe: sample/pairing_c.c include/mcl/vint.hpp src/fp.cpp include/mcl/bn.hpp bin/pairing_c_min.exe: sample/pairing_c.c include/mcl/vint.hpp src/fp.cpp include/mcl/bn.hpp
$(CXX) -std=c++03 -O3 -g -fno-threadsafe-statics -fno-exceptions -fno-rtti -o $@ sample/pairing_c.c src/fp.cpp src/bn_c384_256.cpp -I./include -DXBYAK_NO_EXCEPTION -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384 -DMCL_VINT_64BIT_PORTABLE -DCYBOZU_DONT_USE_STRING -DCYBOZU_DONT_USE_EXCEPTION -DNDEBUG # -DMCL_DONT_USE_CSPRNG $(CXX) -std=c++03 -O3 -g -fno-threadsafe-statics -fno-exceptions -fno-rtti -o $@ sample/pairing_c.c src/fp.cpp src/bn_c384_256.cpp -I./include -DXBYAK_NO_EXCEPTION -DMCL_DONT_USE_OPENSSL -DMCL_USE_VINT -DMCL_SIZEOF_UNIT=8 -DMCL_VINT_FIXED_BUFFER -DMCL_MAX_BIT_SIZE=384 -DMCL_VINT_64BIT_PORTABLE -DCYBOZU_DONT_USE_STRING -DCYBOZU_DONT_USE_EXCEPTION -DNDEBUG # -DMCL_DONT_USE_CSPRNG
bin/llvm_test64.exe: test/llvm_test.cpp src/base64.ll
clang++$(LLVM_VER) -o $@ -Ofast -DNDEBUG -Wall -Wextra -I ./include test/llvm_test.cpp src/base64.ll
bin/llvm_test32.exe: test/llvm_test.cpp src/base32.ll
clang++$(LLVM_VER) -o $@ -Ofast -DNDEBUG -Wall -Wextra -I ./include test/llvm_test.cpp src/base32.ll -m32
make_tbl: make_tbl:
$(MAKE) ../bls/src/qcoeff-bn254.hpp $(MAKE) ../bls/src/qcoeff-bn254.hpp

@ -1,14 +1,18 @@
TOP_DIR=../.. TOP_DIR=../..
include $(TOP_DIR)/common.mk include $(TOP_DIR)/common.mk
ifeq ($(UNAME_S),Darwin) ifeq ($(UNAME_S),Darwin)
JAVA_INC?=-I/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/JavaVM.framework/Headers/ JAVA_INC_DIR?=/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/JavaVM.framework/Headers/
JAVA_INC?=-I$(JAVA_INC_DIR)
CFLAGS+=$(JAVA_INC)/darwin
else else
JAVA_INC?=-I/usr/lib/jvm/default-java/include JAVA_INC_DIR?=/usr/lib/jvm/default-java/include
JAVA_INC?=-I$(JAVA_INC_DIR)
#JAVA_INC=-I/usr/lib/jvm/java-7-openjdk-amd64/include #JAVA_INC=-I/usr/lib/jvm/java-7-openjdk-amd64/include
CFLAGS+=$(JAVA_INC)/linux
CFLAGS+=-z noexecstack CFLAGS+=-z noexecstack
LDFLAGS+=-lrt LDFLAGS+=-lrt
endif endif
CFLAGS+=$(JAVA_INC) $(JAVA_INC)/linux -I $(TOP_DIR)/include -I $(TOP_DIR)/../xbyak -I $(TOP_DIR)/../cybozulib/include -Wno-strict-aliasing CFLAGS+=$(JAVA_INC) -I $(TOP_DIR)/include -I -Wno-strict-aliasing
MCL_LIB=$(TOP_DIR)/lib/libmcl.a MCL_LIB=$(TOP_DIR)/lib/libmcl.a
PACKAGE_NAME=com.herumi.mcl PACKAGE_NAME=com.herumi.mcl
@ -28,7 +32,7 @@ mcl_wrap.cxx: mcl.i mcl_impl.hpp
swig -java -package $(PACKAGE_NAME) -outdir $(PACKAGE_DIR) -c++ -Wall mcl.i swig -java -package $(PACKAGE_NAME) -outdir $(PACKAGE_DIR) -c++ -Wall mcl.i
$(MCL_LIB): $(MCL_LIB):
make -C $(TOP_DIR) make -C $(TOP_DIR) lib/libmcl.a
$(ELGAMAL_LIB): elgamal_wrap.cxx $(MCL_LIB) $(ELGAMAL_LIB): elgamal_wrap.cxx $(MCL_LIB)
$(PRE)$(CXX) $< -o $@ $(CFLAGS) $(MCL_LIB) $(LDFLAGS) -shared $(PRE)$(CXX) $< -o $@ $(CFLAGS) $(MCL_LIB) $(LDFLAGS) -shared
@ -51,7 +55,7 @@ test_elgamal: ElgamalTest.class $(ELGAMAL_LIB)
$(JAVA_EXE) ElgamalTest -e NIST_P256 -h sha256 $(JAVA_EXE) ElgamalTest -e NIST_P256 -h sha256
$(JAVA_EXE) ElgamalTest -e secp256k1 -h sha256 $(JAVA_EXE) ElgamalTest -e secp256k1 -h sha256
$(JAVA_EXE) ElgamalTest -e NIST_P384 -h sha384 $(JAVA_EXE) ElgamalTest -e NIST_P384 -h sha384
$(JAVA_EXE) ElgamalTest -e NIST_P521 -h sha512 # $(JAVA_EXE) ElgamalTest -e NIST_P521 -h sha512
test_mcl: MclTest.class $(MCLJAVA_LIB) test_mcl: MclTest.class $(MCLJAVA_LIB)
$(JAVA_EXE) MclTest $(JAVA_EXE) MclTest

@ -1,6 +1,6 @@
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org). * This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0 * Version 4.0.2
* *
* Do not make changes to this file unless you know what you are doing--modify * Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead. * the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org). * This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0 * Version 4.0.2
* *
* Do not make changes to this file unless you know what you are doing--modify * Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead. * the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org). * This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0 * Version 4.0.2
* *
* Do not make changes to this file unless you know what you are doing--modify * Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead. * the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org). * This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0 * Version 4.0.2
* *
* Do not make changes to this file unless you know what you are doing--modify * Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead. * the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org). * This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0 * Version 4.0.2
* *
* Do not make changes to this file unless you know what you are doing--modify * Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead. * the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org). * This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0 * Version 4.0.2
* *
* Do not make changes to this file unless you know what you are doing--modify * Do not make changes to this file unless you know what you are doing--modify
* the SWIG interface file instead. * the SWIG interface file instead.

@ -1,6 +1,6 @@
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* This file was automatically generated by SWIG (http://www.swig.org). * This file was automatically generated by SWIG (http://www.swig.org).
* Version 4.0.0 * Version 4.0.2
* *
* This file is not intended to be easily readable and contains a number of * This file is not intended to be easily readable and contains a number of
* coding conventions designed to improve portability and efficiency. Do not make * coding conventions designed to improve portability and efficiency. Do not make
@ -182,15 +182,16 @@ template <typename T> T SwigValueInit() {
/* Support for throwing Java exceptions */ /* Support for throwing Java exceptions */
typedef enum { typedef enum {
SWIG_JavaOutOfMemoryError = 1, SWIG_JavaOutOfMemoryError = 1,
SWIG_JavaIOException, SWIG_JavaIOException,
SWIG_JavaRuntimeException, SWIG_JavaRuntimeException,
SWIG_JavaIndexOutOfBoundsException, SWIG_JavaIndexOutOfBoundsException,
SWIG_JavaArithmeticException, SWIG_JavaArithmeticException,
SWIG_JavaIllegalArgumentException, SWIG_JavaIllegalArgumentException,
SWIG_JavaNullPointerException, SWIG_JavaNullPointerException,
SWIG_JavaDirectorPureVirtual, SWIG_JavaDirectorPureVirtual,
SWIG_JavaUnknownError SWIG_JavaUnknownError,
SWIG_JavaIllegalStateException,
} SWIG_JavaExceptionCodes; } SWIG_JavaExceptionCodes;
typedef struct { typedef struct {
@ -211,6 +212,7 @@ static void SWIGUNUSED SWIG_JavaThrowException(JNIEnv *jenv, SWIG_JavaExceptionC
{ SWIG_JavaNullPointerException, "java/lang/NullPointerException" }, { SWIG_JavaNullPointerException, "java/lang/NullPointerException" },
{ SWIG_JavaDirectorPureVirtual, "java/lang/RuntimeException" }, { SWIG_JavaDirectorPureVirtual, "java/lang/RuntimeException" },
{ SWIG_JavaUnknownError, "java/lang/UnknownError" }, { SWIG_JavaUnknownError, "java/lang/UnknownError" },
{ SWIG_JavaIllegalStateException, "java/lang/IllegalStateException" },
{ (SWIG_JavaExceptionCodes)0, "java/lang/UnknownError" } { (SWIG_JavaExceptionCodes)0, "java/lang/UnknownError" }
}; };
const SWIG_JavaExceptions_t *except_ptr = java_exceptions; const SWIG_JavaExceptions_t *except_ptr = java_exceptions;

@ -1623,20 +1623,22 @@ SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isZero(JNIEnv *jenv,
return jresult; return jresult;
} }
SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isValidOrder(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) { SWIGEXPORT jboolean JNICALL Java_com_herumi_mcl_MclJNI_G1_1isValidOrder(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_) {
jboolean jresult = 0 ; jboolean jresult = 0 ;
G1 *arg1 = (G1 *) 0 ; G1 *arg1 = (G1 *) 0 ;
bool result; bool result;
(void)jenv; (void)jenv;
(void)jcls; (void)jcls;
(void)jarg1_; (void)jarg1_;
arg1 = *(G1 **)&jarg1; arg1 = *(G1 **)&jarg1;
result = (bool)((G1 const *)arg1)->isValidOrder(); result = (bool)((G1 const *)arg1)->isValidOrder();
jresult = (jboolean)result; jresult = (jboolean)result;
return jresult; return jresult;
} }
SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) { SWIGEXPORT void JNICALL Java_com_herumi_mcl_MclJNI_G1_1set(JNIEnv *jenv, jclass jcls, jlong jarg1, jobject jarg1_, jlong jarg2, jobject jarg2_, jlong jarg3, jobject jarg3_) {
G1 *arg1 = (G1 *) 0 ; G1 *arg1 = (G1 *) 0 ;
Fp *arg2 = 0 ; Fp *arg2 = 0 ;

@ -28,7 +28,7 @@
#define MCL_USE_VINT #define MCL_USE_VINT
#endif #endif
#ifndef MCL_MAX_BIT_SIZE #ifndef MCL_MAX_BIT_SIZE
#define MCL_MAX_BIT_SIZE 521 #define MCL_MAX_BIT_SIZE 512
#endif #endif
#ifdef MCL_USE_VINT #ifdef MCL_USE_VINT
#include <mcl/vint.hpp> #include <mcl/vint.hpp>

@ -10,6 +10,7 @@ mcl is a library for pairing-based cryptography,
which supports the optimal Ate pairing over BN curves and BLS12-381 curves. which supports the optimal Ate pairing over BN curves and BLS12-381 curves.
# News # News
- set default `MCL_MAX_BIT_SIZE=512` so disable to support `NICT_P521`.
- improve performance - improve performance
- support M1 mac - support M1 mac
- dst for mapToG1 has changed to `BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_`. - dst for mapToG1 has changed to `BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_POP_`.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -309,11 +309,12 @@ void setOp2(Op& op)
if (op.isFullBit) { if (op.isFullBit) {
op.fp_mul = Mont<N, true, Tag>::f; op.fp_mul = Mont<N, true, Tag>::f;
op.fp_sqr = SqrMont<N, true, Tag>::f; op.fp_sqr = SqrMont<N, true, Tag>::f;
op.fpDbl_mod = MontRed<N, true, Tag>::f;
} else { } else {
op.fp_mul = Mont<N, false, Tag>::f; op.fp_mul = Mont<N, false, Tag>::f;
op.fp_sqr = SqrMont<N, false, Tag>::f; op.fp_sqr = SqrMont<N, false, Tag>::f;
op.fpDbl_mod = MontRed<N, false, Tag>::f;
} }
op.fpDbl_mod = MontRed<N, Tag>::f;
} else { } else {
op.fp_mul = Mul<N, Tag>::f; op.fp_mul = Mul<N, Tag>::f;
op.fp_sqr = Sqr<N, Tag>::f; op.fp_sqr = Sqr<N, Tag>::f;
@ -568,38 +569,16 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
} }
#endif #endif
switch (N) { switch (N) {
case 1: setOp<1>(*this, mode); break; case 192/CYBOZU_OS_BIT: setOp<192/CYBOZU_OS_BIT>(*this, mode); break;
case 2: setOp<2>(*this, mode); break; #if CYBOZU_OS_BIT == 32
case 3: setOp<3>(*this, mode); break; case 224/CYBOZU_OS_BIT: setOp<224/CYBOZU_OS_BIT>(*this, mode); break;
case 4: setOp<4>(*this, mode); break; // 256 if 64-bit
#if MCL_MAX_UNIT_SIZE >= 6
case 5: setOp<5>(*this, mode); break;
case 6: setOp<6>(*this, mode); break;
#endif #endif
#if MCL_MAX_UNIT_SIZE >= 8 case 256/CYBOZU_OS_BIT: setOp<256/CYBOZU_OS_BIT>(*this, mode); break;
case 7: setOp<7>(*this, mode); break; #if MCL_MAX_BIT_SIZE >= 384
case 8: setOp<8>(*this, mode); break; case 384/CYBOZU_OS_BIT: setOp<384/CYBOZU_OS_BIT>(*this, mode); break;
#endif #endif
#if MCL_MAX_UNIT_SIZE >= 9 #if MCL_MAX_BIT_SIZE >= 512
case 9: setOp<9>(*this, mode); break; // 521 if 64-bit case 512/CYBOZU_OS_BIT: setOp<512/CYBOZU_OS_BIT>(*this, mode); break;
#endif
#if MCL_MAX_UNIT_SIZE >= 10
case 10: setOp<10>(*this, mode); break;
#endif
#if MCL_MAX_UNIT_SIZE >= 12
case 11: setOp<11>(*this, mode); break;
case 12: setOp<12>(*this, mode); break; // 768 if 64-bit
#endif
#if MCL_MAX_UNIT_SIZE >= 14
case 13: setOp<13>(*this, mode); break;
case 14: setOp<14>(*this, mode); break;
#endif
#if MCL_MAX_UNIT_SIZE >= 16
case 15: setOp<15>(*this, mode); break;
case 16: setOp<16>(*this, mode); break; // 1024 if 64-bit
#endif
#if MCL_MAX_UNIT_SIZE >= 17
case 17: setOp<17>(*this, mode); break; // 521 if 32-bit
#endif #endif
default: default:
return false; return false;
@ -617,10 +596,12 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size
fp_sqr = &mcl_fp_sqr_NIST_P192L; fp_sqr = &mcl_fp_sqr_NIST_P192L;
fpDbl_mod = &mcl_fpDbl_mod_NIST_P192L; fpDbl_mod = &mcl_fpDbl_mod_NIST_P192L;
} }
#if MCL_MAX_BIT_SIZE >= 521
if (primeMode == PM_NIST_P521) { if (primeMode == PM_NIST_P521) {
fpDbl_mod = &mcl_fpDbl_mod_NIST_P521L; fpDbl_mod = &mcl_fpDbl_mod_NIST_P521L;
} }
#endif #endif
#endif
#if defined(MCL_USE_VINT) && MCL_SIZEOF_UNIT == 8 #if defined(MCL_USE_VINT) && MCL_SIZEOF_UNIT == 8
if (primeMode == PM_SECP256K1) { if (primeMode == PM_SECP256K1) {
fp_mul = &mcl::vint::mcl_fp_mul_SECP256K1; fp_mul = &mcl::vint::mcl_fp_mul_SECP256K1;

@ -669,7 +669,7 @@ struct Code : public mcl::Generator {
Operand z(Int, bu); Operand z(Int, bu);
Operand px(IntPtr, unit); Operand px(IntPtr, unit);
Operand y(Int, unit); Operand y(Int, unit);
std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit); std::string name = "mulPv" + cybozu::itoa(bit) + "x" + cybozu::itoa(unit) + suf;
mulPvM[bit] = Function(name, z, px, y); mulPvM[bit] = Function(name, z, px, y);
// workaround at https://github.com/herumi/mcl/pull/82 // workaround at https://github.com/herumi/mcl/pull/82
// mulPvM[bit].setPrivate(); // mulPvM[bit].setPrivate();
@ -715,11 +715,12 @@ struct Code : public mcl::Generator {
Operand z = mul(x, y); Operand z = mul(x, y);
storeN(z, pz); storeN(z, pz);
ret(Void); ret(Void);
} else if (N >= 8 && (N % 2) == 0) { } else if (N > 8 && (N % 2) == 0) {
/* /*
W = 1 << half W = 1 << half
(aW + b)(cW + d) = acW^2 + (ad + bc)W + bd (aW + b)(cW + d) = acW^2 + (ad + bc)W + bd
ad + bc = (a + b)(c + d) - ac - bd ad + bc = (a + b)(c + d) - ac - bd
@note Karatsuba is slower for N = 8
*/ */
const int H = N / 2; const int H = N / 2;
const int half = bit / 2; const int half = bit / 2;
@ -883,37 +884,79 @@ struct Code : public mcl::Generator {
ret(Void); ret(Void);
endFunc(); endFunc();
} }
void gen_mcl_fp_montRed() // return [H:L]
Operand pack(Operand H, Operand L)
{
int size = H.bit + L.bit;
H = zext(H, size);
H = shl(H, L.bit);
L = zext(L, size);
H = _or(H, L);
return H;
}
// split x to [ret:L] s.t. size of L = sizeL
Operand split(Operand *L, const Operand& x, int sizeL)
{
Operand ret = lshr(x, sizeL);
ret = trunc(ret, ret.bit - sizeL);
*L = trunc(x, sizeL);
return ret;
}
void gen_mcl_fp_montRed(bool isFullBit = true)
{ {
const int bu = bit + unit;
const int b2 = bit * 2;
const int b2u = b2 + unit;
resetGlobalIdx(); resetGlobalIdx();
Operand pz(IntPtr, unit); Operand pz(IntPtr, unit);
Operand pxy(IntPtr, unit); Operand pxy(IntPtr, unit);
Operand pp(IntPtr, unit); Operand pp(IntPtr, unit);
std::string name = "mcl_fp_montRed" + cybozu::itoa(N) + "L" + suf; std::string name = "mcl_fp_montRed";
if (!isFullBit) {
name += "NF";
}
name += cybozu::itoa(N) + "L" + suf;
mcl_fp_montRedM[N] = Function(name, Void, pz, pxy, pp); mcl_fp_montRedM[N] = Function(name, Void, pz, pxy, pp);
verifyAndSetPrivate(mcl_fp_montRedM[N]); verifyAndSetPrivate(mcl_fp_montRedM[N]);
beginFunc(mcl_fp_montRedM[N]); beginFunc(mcl_fp_montRedM[N]);
Operand rp = load(getelementptr(pp, -1)); Operand rp = load(getelementptr(pp, -1));
Operand p = loadN(pp, N); Operand p = loadN(pp, N);
Operand xy = loadN(pxy, N * 2); const int bu = bit + unit;
Operand t = zext(xy, b2 + unit); const int bu2 = bit + unit * 2;
Operand t = loadN(pxy, N);
Operand H;
for (uint32_t i = 0; i < N; i++) { for (uint32_t i = 0; i < N; i++) {
Operand z = trunc(t, unit); Operand q;
Operand q = mul(z, rp); if (N == 1) {
q = mul(t, rp);
} else {
q = mul(trunc(t, unit), rp);
}
Operand pq = call(mulPvM[bit], pp, q); Operand pq = call(mulPvM[bit], pp, q);
pq = zext(pq, b2u - unit * i); if (i > 0) {
z = add(t, pq); H = zext(H, bu);
z = lshr(z, unit); H = shl(H, bit);
t = trunc(z, b2 - unit * i); pq = add(pq, H);
}
Operand next = load(getelementptr(pxy, N + i));
t = pack(next, t);
t = zext(t, bu2);
pq = zext(pq, bu2);
t = add(t, pq);
t = lshr(t, unit);
t = trunc(t, bu);
H = split(&t, t, bit);
}
Operand z;
if (isFullBit) {
p = zext(p, bu);
t = zext(t, bu);
Operand vc = sub(t, p);
Operand c = trunc(lshr(vc, bit), 1);
z = select(c, t, vc);
z = trunc(z, bit);
} else {
Operand vc = sub(t, p);
Operand c = trunc(lshr(vc, bit - 1), 1);
z = select(c, t, vc);
} }
p = zext(p, bu);
Operand vc = sub(t, p);
Operand c = trunc(lshr(vc, bit), 1);
Operand z = select(c, t, vc);
z = trunc(z, bit);
storeN(z, pz); storeN(z, pz);
ret(Void); ret(Void);
endFunc(); endFunc();
@ -941,7 +984,8 @@ struct Code : public mcl::Generator {
gen_mcl_fpDbl_sqrPre(); gen_mcl_fpDbl_sqrPre();
gen_mcl_fp_mont(true); gen_mcl_fp_mont(true);
gen_mcl_fp_mont(false); gen_mcl_fp_mont(false);
gen_mcl_fp_montRed(); gen_mcl_fp_montRed(true);
gen_mcl_fp_montRed(false);
} }
void setBit(uint32_t bit) void setBit(uint32_t bit)
{ {
@ -962,6 +1006,23 @@ struct Code : public mcl::Generator {
gen_mulUU(); gen_mulUU();
#else #else
gen_once(); gen_once();
#if 1
int bitTbl[] = {
192,
224,
256,
384,
512
};
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(bitTbl); i++) {
uint32_t bit = bitTbl[i];
if (unit == 64 && bit == 224) continue;
setBit(bit);
gen_mul();
gen_all();
gen_addsub();
}
#else
uint32_t end = ((maxBitSize + unit - 1) / unit); uint32_t end = ((maxBitSize + unit - 1) / unit);
for (uint32_t n = 1; n <= end; n++) { for (uint32_t n = 1; n <= end; n++) {
setBit(n * unit); setBit(n * unit);
@ -969,6 +1030,7 @@ struct Code : public mcl::Generator {
gen_all(); gen_all();
gen_addsub(); gen_addsub();
} }
#endif
if (unit == 64 && maxBitSize == 768) { if (unit == 64 && maxBitSize == 768) {
for (uint32_t i = maxBitSize + unit * 2; i <= maxBitSize * 2; i += unit * 2) { for (uint32_t i = maxBitSize + unit * 2; i <= maxBitSize * 2; i += unit * 2) {
setBit(i); setBit(i);

@ -510,7 +510,7 @@ const void4u DblSub<N, Tag>::f = DblSub<N, Tag>::func;
z[N] <- montRed(xy[N * 2], p[N]) z[N] <- montRed(xy[N * 2], p[N])
REMARK : assume p[-1] = rp REMARK : assume p[-1] = rp
*/ */
template<size_t N, class Tag = Gtag> template<size_t N, bool isFullBit, class Tag = Gtag>
struct MontRed { struct MontRed {
static inline void func(Unit *z, const Unit *xy, const Unit *p) static inline void func(Unit *z, const Unit *xy, const Unit *p)
{ {
@ -546,8 +546,8 @@ struct MontRed {
static const void3u f; static const void3u f;
}; };
template<size_t N, class Tag> template<size_t N, bool isFullBit, class Tag>
const void3u MontRed<N, Tag>::f = MontRed<N, Tag>::func; const void3u MontRed<N, isFullBit, Tag>::f = MontRed<N, isFullBit, Tag>::func;
/* /*
z[N] <- Montgomery(x[N], y[N], p[N]) z[N] <- Montgomery(x[N], y[N], p[N])
@ -560,7 +560,7 @@ struct Mont {
#if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed #if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
Unit xy[N * 2]; Unit xy[N * 2];
MulPre<N, Tag>::f(xy, x, y); MulPre<N, Tag>::f(xy, x, y);
MontRed<N, Tag>::f(z, xy, p); MontRed<N, isFullBit, Tag>::f(z, xy, p);
#else #else
const Unit rp = p[-1]; const Unit rp = p[-1];
if (isFullBit) { if (isFullBit) {
@ -641,10 +641,10 @@ template<size_t N, bool isFullBit, class Tag = Gtag>
struct SqrMont { struct SqrMont {
static inline void func(Unit *y, const Unit *x, const Unit *p) static inline void func(Unit *y, const Unit *x, const Unit *p)
{ {
#if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed #if 0 // #if MCL_MAX_BIT_SIZE == 1024 || MCL_SIZEOF_UNIT == 4 // check speed
Unit xx[N * 2]; Unit xx[N * 2];
SqrPre<N, Tag>::f(xx, x); SqrPre<N, Tag>::f(xx, x);
MontRed<N, Tag>::f(y, xx, p); MontRed<N, isFullBit, Tag>::f(y, xx, p);
#else #else
Mont<N, isFullBit, Tag>::f(y, x, x, p); Mont<N, isFullBit, Tag>::f(y, x, x, p);
#endif #endif
@ -702,9 +702,9 @@ struct Fp2MulNF {
MulPre<N, Tag>::f(d2, b, d); MulPre<N, Tag>::f(d2, b, d);
SubPre<N * 2, Tag>::f(d0, d0, d1); SubPre<N * 2, Tag>::f(d0, d0, d1);
SubPre<N * 2, Tag>::f(d0, d0, d2); SubPre<N * 2, Tag>::f(d0, d0, d2);
MontRed<N, Tag>::f(z + N, d0, p); MontRed<N, false, Tag>::f(z + N, d0, p);
DblSub<N, Tag>::f(d1, d1, d2, p); DblSub<N, Tag>::f(d1, d1, d2, p);
MontRed<N, Tag>::f(z, d1, p); MontRed<N, false, Tag>::f(z, d1, p);
} }
static const void4u f; static const void4u f;
}; };

@ -37,7 +37,8 @@ template<>const void4u Sub<n, true, tag>::f = &mcl_fp_sub ## n ## suf; \
template<>const void4u Sub<n, false, tag>::f = &mcl_fp_subNF ## n ## suf; \ template<>const void4u Sub<n, false, tag>::f = &mcl_fp_subNF ## n ## suf; \
template<>const void4u Mont<n, true, tag>::f = &mcl_fp_mont ## n ## suf; \ template<>const void4u Mont<n, true, tag>::f = &mcl_fp_mont ## n ## suf; \
template<>const void4u Mont<n, false, tag>::f = &mcl_fp_montNF ## n ## suf; \ template<>const void4u Mont<n, false, tag>::f = &mcl_fp_montNF ## n ## suf; \
template<>const void3u MontRed<n, tag>::f = &mcl_fp_montRed ## n ## suf; \ template<>const void3u MontRed<n, true, tag>::f = &mcl_fp_montRed ## n ## suf; \
template<>const void3u MontRed<n, false, tag>::f = &mcl_fp_montRedNF ## n ## suf; \
template<>const void4u DblAdd<n, tag>::f = &mcl_fpDbl_add ## n ## suf; \ template<>const void4u DblAdd<n, tag>::f = &mcl_fpDbl_add ## n ## suf; \
template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \ template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
@ -50,44 +51,29 @@ template<>const void4u DblSub<n, tag>::f = &mcl_fpDbl_sub ## n ## suf; \
MCL_DEF_LLVM_FUNC2(n, Ltag, L) MCL_DEF_LLVM_FUNC2(n, Ltag, L)
#endif #endif
MCL_DEF_LLVM_FUNC(1) #if CYBOZU_OS_BIT == 32
MCL_DEF_LLVM_FUNC(2)
MCL_DEF_LLVM_FUNC(3)
MCL_DEF_LLVM_FUNC(4)
#if MCL_MAX_UNIT_SIZE >= 6
MCL_DEF_LLVM_FUNC(5)
MCL_DEF_LLVM_FUNC(6) MCL_DEF_LLVM_FUNC(6)
#endif
#if MCL_MAX_UNIT_SIZE >= 8
MCL_DEF_LLVM_FUNC(7) MCL_DEF_LLVM_FUNC(7)
MCL_DEF_LLVM_FUNC(8) MCL_DEF_LLVM_FUNC(8)
#endif
#if MCL_MAX_UNIT_SIZE >= 9
MCL_DEF_LLVM_FUNC(9)
#endif
#if MCL_MAX_UNIT_SIZE >= 10
MCL_DEF_LLVM_FUNC(10)
#endif
#if MCL_MAX_UNIT_SIZE >= 12 #if MCL_MAX_UNIT_SIZE >= 12
MCL_DEF_LLVM_FUNC(11)
MCL_DEF_LLVM_FUNC(12) MCL_DEF_LLVM_FUNC(12)
#endif #endif
#if MCL_MAX_UNIT_SIZE >= 14
MCL_DEF_LLVM_FUNC(13)
MCL_DEF_LLVM_FUNC(14)
#endif
#if MCL_MAX_UNIT_SIZE >= 16 #if MCL_MAX_UNIT_SIZE >= 16
MCL_DEF_LLVM_FUNC(15)
#if MCL_SIZEOF_UNIT == 4
MCL_DEF_LLVM_FUNC(16) MCL_DEF_LLVM_FUNC(16)
#else
/// QQQ : check speed
template<>const void3u MontRed<16, Ltag>::f = &mcl_fp_montRed16L;
template<>const void3u MontRed<16, LBMI2tag>::f = &mcl_fp_montRed16Lbmi2;
#endif #endif
#else // 64
MCL_DEF_LLVM_FUNC(3)
MCL_DEF_LLVM_FUNC(4)
#if MCL_MAX_UNIT_SIZE >= 6
MCL_DEF_LLVM_FUNC(6)
#endif #endif
#if MCL_MAX_UNIT_SIZE >= 17 #if MCL_MAX_UNIT_SIZE >= 8
MCL_DEF_LLVM_FUNC(17) MCL_DEF_LLVM_FUNC(8)
#endif
#endif #endif
} } // mcl::fp } } // mcl::fp

@ -22,6 +22,7 @@ void mcl_fpDbl_sqrPre ## n ## suf(mcl::fp::Unit* y, const mcl::fp::Unit* x); \
void mcl_fp_mont ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ void mcl_fp_mont ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_montNF ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ void mcl_fp_montNF ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fp_montRed ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p); \ void mcl_fp_montRed ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p); \
void mcl_fp_montRedNF ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, const mcl::fp::Unit* p); \
void mcl_fpDbl_add ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \ void mcl_fpDbl_add ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); \
void mcl_fpDbl_sub ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p); void mcl_fpDbl_sub ## n ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* x, const mcl::fp::Unit* y, const mcl::fp::Unit* p);
@ -37,38 +38,29 @@ void mcl_fpDbl_mod_NIST_P521 ## suf(mcl::fp::Unit* z, const mcl::fp::Unit* xy, c
extern "C" { extern "C" {
MCL_FP_DEF_FUNC(1) #if CYBOZU_OS_BIT == 32
MCL_FP_DEF_FUNC(2)
MCL_FP_DEF_FUNC(3)
MCL_FP_DEF_FUNC(4)
#if MCL_MAX_UNIT_SIZE >= 6
MCL_FP_DEF_FUNC(5)
MCL_FP_DEF_FUNC(6) MCL_FP_DEF_FUNC(6)
#endif
#if MCL_MAX_UNIT_SIZE >= 8
MCL_FP_DEF_FUNC(7) MCL_FP_DEF_FUNC(7)
MCL_FP_DEF_FUNC(8) MCL_FP_DEF_FUNC(8)
#endif
#if MCL_MAX_UNIT_SIZE >= 9
MCL_FP_DEF_FUNC(9)
#endif
#if MCL_MAX_UNIT_SIZE >= 10
MCL_FP_DEF_FUNC(10)
#endif
#if MCL_MAX_UNIT_SIZE >= 12 #if MCL_MAX_UNIT_SIZE >= 12
MCL_FP_DEF_FUNC(11)
MCL_FP_DEF_FUNC(12) MCL_FP_DEF_FUNC(12)
#endif #endif
#if MCL_MAX_UNIT_SIZE >= 14
MCL_FP_DEF_FUNC(13)
MCL_FP_DEF_FUNC(14)
#endif
#if MCL_MAX_UNIT_SIZE >= 16 #if MCL_MAX_UNIT_SIZE >= 16
MCL_FP_DEF_FUNC(15)
MCL_FP_DEF_FUNC(16) MCL_FP_DEF_FUNC(16)
#endif #endif
#if MCL_MAX_UNIT_SIZE >= 17
MCL_FP_DEF_FUNC(17) #else // 64
MCL_FP_DEF_FUNC(3)
MCL_FP_DEF_FUNC(4)
#if MCL_MAX_UNIT_SIZE >= 6
MCL_FP_DEF_FUNC(6)
#endif
#if MCL_MAX_UNIT_SIZE >= 8
MCL_FP_DEF_FUNC(8)
#endif
#endif #endif
MCL_FP_DEF_FUNC_SPECIAL(L) MCL_FP_DEF_FUNC_SPECIAL(L)

@ -15,10 +15,14 @@ typedef mcl::FpT<> Fp;
const int MAX_N = 4; const int MAX_N = 4;
const char *primeTable[] = { const char *primeTable[] = {
#if 0
"0x7fffffffffffffffffffffffffffffff", // 127bit(not full) "0x7fffffffffffffffffffffffffffffff", // 127bit(not full)
"0xffffffffffffffffffffffffffffff61", // 128bit(full) "0xffffffffffffffffffffffffffffff61", // 128bit(full)
#endif
"0x7fffffffffffffffffffffffffffffffffffffffffffffed", // 191bit(not full)
"0xfffffffffffffffffffffffffffffffffffffffeffffee37", // 192bit(full) "0xfffffffffffffffffffffffffffffffffffffffeffffee37", // 192bit(full)
"0x2523648240000001ba344d80000000086121000000000013a700000000000013", // 254bit(not full) "0x2523648240000001ba344d80000000086121000000000013a700000000000013", // 254bit(not full)
"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // 256bit(full)
}; };
void strToArray(uint64_t *p, size_t n, const char *pStr) void strToArray(uint64_t *p, size_t n, const char *pStr)

@ -426,6 +426,7 @@ void powTest()
CYBOZU_TEST_EQUAL(z, 1); CYBOZU_TEST_EQUAL(z, 1);
Fp::pow(z, x, Fp::getOp().mp); Fp::pow(z, x, Fp::getOp().mp);
CYBOZU_TEST_EQUAL(z, x); CYBOZU_TEST_EQUAL(z, x);
#if 0
typedef mcl::FpT<tag2, 128> Fp_other; typedef mcl::FpT<tag2, 128> Fp_other;
Fp_other::init("1009"); Fp_other::init("1009");
x = 5; x = 5;
@ -436,6 +437,7 @@ void powTest()
x = 5; x = 5;
Fp::pow(x, x, n); Fp::pow(x, x, n);
CYBOZU_TEST_EQUAL(x, 125); CYBOZU_TEST_EQUAL(x, 125);
#endif
} }
void mulUnitTest() void mulUnitTest()
@ -487,6 +489,7 @@ void powGmp()
struct TagAnother; struct TagAnother;
#if 0
void anotherFpTest(mcl::fp::Mode mode) void anotherFpTest(mcl::fp::Mode mode)
{ {
typedef mcl::FpT<TagAnother, 128> G; typedef mcl::FpT<TagAnother, 128> G;
@ -496,6 +499,7 @@ void anotherFpTest(mcl::fp::Mode mode)
a *= b; a *= b;
CYBOZU_TEST_EQUAL(a, 1); CYBOZU_TEST_EQUAL(a, 1);
} }
#endif
void setArrayTest1() void setArrayTest1()
{ {
@ -508,6 +512,7 @@ void setArrayTest1()
CYBOZU_TEST_EQUAL(x, Fp("0x3400000012")); CYBOZU_TEST_EQUAL(x, Fp("0x3400000012"));
} }
#if 0
void setArrayTest2(mcl::fp::Mode mode) void setArrayTest2(mcl::fp::Mode mode)
{ {
Fp::init("0x10000000000001234567a5", mode); Fp::init("0x10000000000001234567a5", mode);
@ -529,6 +534,7 @@ void setArrayTest2(mcl::fp::Mode mode)
uint32_t large[3] = { 0x234567a5, 0x00000001, 0x00100000}; uint32_t large[3] = { 0x234567a5, 0x00000001, 0x00100000};
CYBOZU_TEST_EXCEPTION(x.setArray(large, 3), cybozu::Exception); CYBOZU_TEST_EXCEPTION(x.setArray(large, 3), cybozu::Exception);
} }
#endif
void setArrayMaskTest1() void setArrayMaskTest1()
{ {
@ -541,6 +547,7 @@ void setArrayMaskTest1()
CYBOZU_TEST_EQUAL(x, Fp("0x3400000012")); CYBOZU_TEST_EQUAL(x, Fp("0x3400000012"));
} }
#if 0
void setArrayMaskTest2(mcl::fp::Mode mode) void setArrayMaskTest2(mcl::fp::Mode mode)
{ {
Fp::init("0x10000000000001234567a5", mode); Fp::init("0x10000000000001234567a5", mode);
@ -560,6 +567,7 @@ void setArrayMaskTest2(mcl::fp::Mode mode)
CYBOZU_TEST_EQUAL(x, Fp(tbl[i].expected)); CYBOZU_TEST_EQUAL(x, Fp(tbl[i].expected));
} }
} }
#endif
void setArrayModTest() void setArrayModTest()
{ {
@ -602,13 +610,13 @@ void setArrayModTest()
CYBOZU_TEST_AUTO(set64bit) CYBOZU_TEST_AUTO(set64bit)
{ {
Fp::init("0x1000000000000000000f"); Fp::init("3138550867693340381917894711603833208051177722232017256453");
const struct { const struct {
const char *p; const char *p;
int64_t i; int64_t i;
} tbl[] = { } tbl[] = {
{ "0x1234567812345678", int64_t(0x1234567812345678ull) }, { "0x1234567812345678", int64_t(0x1234567812345678ull) },
{ "0xfffedcba987edcba997", -int64_t(0x1234567812345678ull) }, { "-5", -5 },
}; };
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
Fp x(tbl[i].p); Fp x(tbl[i].p);
@ -935,17 +943,7 @@ void sub(mcl::fp::Mode mode)
{ {
printf("mode=%s\n", mcl::fp::ModeToStr(mode)); printf("mode=%s\n", mcl::fp::ModeToStr(mode));
const char *tbl[] = { const char *tbl[] = {
// N = 2
"0x0000000000000001000000000000000d",
"0x7fffffffffffffffffffffffffffffff",
"0x8000000000000000000000000000001d",
"0xffffffffffffffffffffffffffffff61",
// N = 3 // N = 3
"0x000000000000000100000000000000000000000000000033", // min prime
"0x00000000fffffffffffffffffffffffffffffffeffffac73",
"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
"0x000000010000000000000000000000000000000000000007",
"0x30000000000000000000000000000000000000000000002b", "0x30000000000000000000000000000000000000000000002b",
"0x70000000000000000000000000000000000000000000001f", "0x70000000000000000000000000000000000000000000001f",
"0x800000000000000000000000000000000000000000000005", "0x800000000000000000000000000000000000000000000005",
@ -1001,9 +999,9 @@ void sub(mcl::fp::Mode mode)
serializeTest(); serializeTest();
modpTest(); modpTest();
} }
anotherFpTest(mode); // anotherFpTest(mode);
setArrayTest2(mode); // setArrayTest2(mode);
setArrayMaskTest2(mode); // setArrayMaskTest2(mode);
} }
std::string g_mode; std::string g_mode;

@ -430,17 +430,7 @@ void test(const char *p, mcl::fp::Mode mode)
void testAll() void testAll()
{ {
const char *tbl[] = { const char *tbl[] = {
// N = 2
"0x0000000000000001000000000000000d",
"0x7fffffffffffffffffffffffffffffff",
"0x8000000000000000000000000000001d",
"0xffffffffffffffffffffffffffffff61",
// N = 3 // N = 3
"0x000000000000000100000000000000000000000000000033", // min prime
"0x00000000fffffffffffffffffffffffffffffffeffffac73",
"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
"0x000000010000000000000000000000000000000000000007",
"0x30000000000000000000000000000000000000000000002b", "0x30000000000000000000000000000000000000000000002b",
"0x70000000000000000000000000000000000000000000001f", "0x70000000000000000000000000000000000000000000001f",
"0x800000000000000000000000000000000000000000000005", "0x800000000000000000000000000000000000000000000005",

@ -153,10 +153,13 @@ void testGLV1()
GLV1::mul(P2, P0, ss, true); GLV1::mul(P2, P0, ss, true);
CYBOZU_TEST_EQUAL(P1, P2); CYBOZU_TEST_EQUAL(P1, P2);
} }
#ifndef NDEBUG
puts("skip testGLV1 in debug");
Fr s; Fr s;
mapToG1(P0, 123); mapToG1(P0, 123);
CYBOZU_BENCH_C("Ec::mul", 100, P1 = P0; s.setRand(rg); G1::mulGeneric, P2, P1, s.getMpz()); CYBOZU_BENCH_C("Ec::mul", 100, P1 = P0; s.setRand(rg); G1::mulGeneric, P2, P1, s.getMpz());
CYBOZU_BENCH_C("Ec::glv", 100, P1 = P0; s.setRand(rg); GLV1::mul, P2, P1, s.getMpz()); CYBOZU_BENCH_C("Ec::glv", 100, P1 = P0; s.setRand(rg); GLV1::mul, P2, P1, s.getMpz());
#endif
} }
/* /*
@ -188,10 +191,13 @@ void testGLV2()
GLV2::mul(Q2, Q0, n); GLV2::mul(Q2, Q0, n);
CYBOZU_TEST_EQUAL(Q1, Q2); CYBOZU_TEST_EQUAL(Q1, Q2);
} }
#ifndef NDEBUG
puts("skip testGLV2 in debug");
Fr s; Fr s;
mapToG2(Q0, 123); mapToG2(Q0, 123);
CYBOZU_BENCH_C("G2::mul", 1000, Q2 = Q0; s.setRand(rg); G2::mulGeneric, Q2, Q1, s.getMpz()); CYBOZU_BENCH_C("G2::mul", 1000, Q2 = Q0; s.setRand(rg); G2::mulGeneric, Q2, Q1, s.getMpz());
CYBOZU_BENCH_C("G2::glv", 1000, Q1 = Q0; s.setRand(rg); GLV2::mul, Q2, Q1, s.getMpz()); CYBOZU_BENCH_C("G2::glv", 1000, Q1 = Q0; s.setRand(rg); GLV2::mul, Q2, Q1, s.getMpz());
#endif
} }
void testGT() void testGT()

@ -0,0 +1,130 @@
/*
32bit raspi
N=6
mulPre 511.30nsec
sqrPre 598.33nsec
mod 769.64nsec
mont 1.283usec
N=8
mulPre 1.463usec
sqrPre 1.422usec
mod 1.972usec
mont 2.962usec
N=12
mulPre 2.229usec
sqrPre 2.056usec
mod 3.811usec
mont 6.802usec
N=16
mulPre 4.955usec
sqrPre 4.706usec
mod 6.817usec
mont 12.916usec
*/
#include <stdio.h>
#include <stdint.h>
#include <cybozu/inttype.hpp>
#include <cybozu/benchmark.hpp>
#include <cybozu/xorshift.hpp>
typedef size_t Unit;
template<size_t N>
void mulPre(Unit*, const Unit*, const Unit*);
template<size_t N>
void sqrPre(Unit*, const Unit*);
template<size_t N>
void mod(Unit*, const Unit*, const Unit *);
template<size_t N>
void mont(Unit*, const Unit*, const Unit*, const Unit *);
#define MCL_FP_DEF_FUNC_SUB(n, suf) \
extern "C" { \
void mcl_fp_add ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_addNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_sub ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_subNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_shr1_ ## n ## suf(Unit*y, const Unit* x); \
Unit mcl_fp_addPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
Unit mcl_fp_subPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
void mcl_fp_mulUnitPre ## n ## suf(Unit* z, const Unit* x, Unit y); \
void mcl_fpDbl_mulPre ## n ## suf(Unit* z, const Unit* x, const Unit* y); \
void mcl_fpDbl_sqrPre ## n ## suf(Unit* y, const Unit* x); \
void mcl_fp_mont ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_montNF ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fp_montRed ## n ## suf(Unit* z, const Unit* xy, const Unit* p); \
void mcl_fp_montRedNF ## n ## suf(Unit* z, const Unit* xy, const Unit* p); \
void mcl_fpDbl_add ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
void mcl_fpDbl_sub ## n ## suf(Unit* z, const Unit* x, const Unit* y, const Unit* p); \
} \
template<>void mulPre<n>(Unit *z, const Unit *x, const Unit *y) { mcl_fpDbl_mulPre ## n ## suf(z, x, y); } \
template<>void sqrPre<n>(Unit *z, const Unit *x) { mcl_fpDbl_sqrPre ## n ## suf(z, x); } \
template<>void mod<n>(Unit *z, const Unit *x, const Unit *p) { mcl_fp_montRedNF ## n ## suf(z, x, p); } \
template<>void mont<n>(Unit *z, const Unit *x, const Unit *y, const Unit *p) { mcl_fp_montNF ## n ## suf(z, x, y, p); }
#if CYBOZU_OS_BIT == 64
MCL_FP_DEF_FUNC_SUB(4, L)
MCL_FP_DEF_FUNC_SUB(5, L)
#endif
MCL_FP_DEF_FUNC_SUB(6, L)
//MCL_FP_DEF_FUNC_SUB(7, L)
MCL_FP_DEF_FUNC_SUB(8, L)
#if CYBOZU_OS_BIT == 32
MCL_FP_DEF_FUNC_SUB(12, L)
MCL_FP_DEF_FUNC_SUB(16, L)
#endif
template<class RG, class T>
void setRand(T *x, size_t n, RG& rg)
{
for (size_t i = 0; i < n; i++) {
if (sizeof(T) == 4) {
x[i] = rg.get32();
} else {
x[i] = rg.get64();
}
}
}
template<size_t N>
void bench(Unit *x, Unit *y, const Unit *p)
{
printf("N=%zd\n", N);
Unit xx[N * 2], yy[N * 2];
#if CYBOZU_OS_BIT == 64
const int C = 10000;
#else
const int C = 1000;
#endif
CYBOZU_BENCH_C("mulPre", C, mulPre<N>, xx, x, y);
CYBOZU_BENCH_C("sqrPre", C, sqrPre<N>, yy, x);
CYBOZU_BENCH_C("mod ", C, mod<N>, yy, xx, p);
CYBOZU_BENCH_C("mont ", C, mont<N>, yy, x, y, p);
}
int main()
{
printf("sizeof(Unit)=%zd\n", sizeof(Unit));
const size_t maxN = 16;
Unit x[maxN], y[maxN], p[maxN + 1];
cybozu::XorShift rg;
setRand(x, maxN, rg);
setRand(y, maxN, rg);
setRand(p, maxN + 1, rg);
#if CYBOZU_OS_BIT == 64
bench<4>(x, y, p + 1);
bench<5>(x, y, p + 1);
#endif
bench<6>(x, y, p + 1);
// bench<7>(x, y, p + 1);
bench<8>(x, y, p + 1);
#if CYBOZU_OS_BIT == 32
bench<12>(x, y, p + 1);
bench<16>(x, y, p + 1);
#endif
}

@ -244,17 +244,7 @@ CYBOZU_TEST_AUTO(test)
Test test; Test test;
const char *tbl[] = { const char *tbl[] = {
#if 1 #if 1
// N = 2
"0x0000000000000001000000000000000d",
"0x7fffffffffffffffffffffffffffffff",
"0x8000000000000000000000000000001d",
"0xffffffffffffffffffffffffffffff61",
// N = 3 // N = 3
"0x000000000000000100000000000000000000000000000033", // min prime
"0x00000000fffffffffffffffffffffffffffffffeffffac73",
"0x0000000100000000000000000001b8fa16dfab9aca16b6b3",
"0x000000010000000000000000000000000000000000000007",
"0x30000000000000000000000000000000000000000000002b", "0x30000000000000000000000000000000000000000000002b",
"0x70000000000000000000000000000000000000000000001f", "0x70000000000000000000000000000000000000000000001f",
"0x800000000000000000000000000000000000000000000005", "0x800000000000000000000000000000000000000000000005",

Loading…
Cancel
Save