From 14fd1d125d21f47c2539a0e820e2a417e0c715f8 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 5 May 2015 10:52:44 +0900 Subject: [PATCH] from mie --- COPYRIGHT | 47 + Makefile | 20 + common.mk | 105 ++ common.props | 26 + debug.props | 14 + include/mcl/ec.hpp | 585 ++++++++++ include/mcl/ecparam.hpp | 161 +++ include/mcl/fp.hpp | 446 ++++++++ include/mcl/fp_base.hpp | 527 +++++++++ include/mcl/fp_generator.hpp | 1675 +++++++++++++++++++++++++++++ include/mcl/fp_util.hpp | 294 +++++ include/mcl/gmp_util.hpp | 378 +++++++ include/mcl/mont_fp.hpp | 463 ++++++++ include/mcl/operator.hpp | 118 ++ include/mcl/power.hpp | 181 ++++ include/mcl/tagmultigr.hpp | 39 + mcl.sln | 25 + release.props | 12 + sample/Makefile | 23 + sample/ecdh_smpl.cpp | 69 ++ sample/random_smpl.cpp | 29 + src/Makefile | 42 + src/all.txt | 7 + src/gen.py | 187 ++++ src/long.txt | 54 + src/mul.txt | 81 ++ src/once.txt | 74 ++ src/short.txt | 46 + test/Makefile | 42 + test/base_test.cpp | 392 +++++++ test/ec_test.cpp | 397 +++++++ test/fp_generator_test.cpp | 222 ++++ test/fp_test.cpp | 465 ++++++++ test/fp_util_test.cpp | 191 ++++ test/mk32.sh | 1 + test/mont_fp_test.cpp | 809 ++++++++++++++ test/proj/ec_test/ec_test.vcxproj | 88 ++ test/proj/fp_test/fp_test.vcxproj | 91 ++ test/sq_test.cpp | 21 + 39 files changed, 8447 insertions(+) create mode 100644 COPYRIGHT create mode 100644 Makefile create mode 100644 common.mk create mode 100644 common.props create mode 100644 debug.props create mode 100644 include/mcl/ec.hpp create mode 100644 include/mcl/ecparam.hpp create mode 100644 include/mcl/fp.hpp create mode 100644 include/mcl/fp_base.hpp create mode 100644 include/mcl/fp_generator.hpp create mode 100644 include/mcl/fp_util.hpp create mode 100644 include/mcl/gmp_util.hpp create mode 100644 include/mcl/mont_fp.hpp create mode 100644 include/mcl/operator.hpp create mode 100644 include/mcl/power.hpp create mode 100644 include/mcl/tagmultigr.hpp create mode 100644 mcl.sln create mode 100644 release.props create mode 100644 sample/Makefile create mode 100644 sample/ecdh_smpl.cpp create mode 100644 sample/random_smpl.cpp create mode 100644 src/Makefile create mode 100644 src/all.txt create mode 100644 src/gen.py create mode 100644 src/long.txt create mode 100644 src/mul.txt create mode 100644 src/once.txt create mode 100644 src/short.txt create mode 100644 test/Makefile create mode 100644 test/base_test.cpp create mode 100644 test/ec_test.cpp create mode 100644 test/fp_generator_test.cpp create mode 100644 test/fp_test.cpp create mode 100644 test/fp_util_test.cpp create mode 100644 test/mk32.sh create mode 100644 test/mont_fp_test.cpp create mode 100644 test/proj/ec_test/ec_test.vcxproj create mode 100644 test/proj/fp_test/fp_test.vcxproj create mode 100644 test/sq_test.cpp diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..bfe54da --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,47 @@ + +Copyright (c) 2015 MITSUNARI Shigeo +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +Neither the name of the copyright owner nor the names of its contributors may +be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た +す場合に限り、再頒布および使用が許可されます。 + +ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項 +を含めること。 +バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作 +権表示、本条件一覧、および下記免責条項を含めること。 +書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進 +に、著作権者の名前またはコントリビューターの名前を使用してはならない。 +本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ +れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性 +に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。 +著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを +問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で +あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、 +本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の +喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接 +損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、 +一切責任を負わないものとします。 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..383af2b --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +include common.mk + +all: + $(MKDIR) bin + $(MAKE) -C test + $(MAKE) -C sample + +test: + $(MAKE) -C test test + +sample: + $(MAKE) -C sample test + +clean: +# $(MAKE) -C src clean + $(MAKE) -C test clean + $(MAKE) -C sample clean + +.PHONY: sample + diff --git a/common.mk b/common.mk new file mode 100644 index 0000000..b861db1 --- /dev/null +++ b/common.mk @@ -0,0 +1,105 @@ +GCC_VER=$(shell $(PRE)$(CC) -dumpversion) +UNAME_S=$(shell uname -s) +ifeq ($(UNAME_S),Linux) + OS=Linux +endif +ifneq ($(UNAME_S),Darwin) + LDFLAGS += -lrt +endif +CP = cp -f +AR = ar r +MKDIR=mkdir -p +RM=rm -fr +CFLAGS_OPT+=-fomit-frame-pointer -DNDEBUG +ifeq ($(CXX),clang++) + CFLAGS_OPT+=-O3 +else + ifeq ($(shell expr $(GCC_VER) \> 4.6.0),1) + CFLAGS_OPT+=-Ofast + else + CFLAGS_OPT+=-O3 + endif +endif +CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith +CFLAGS+= -g -D_FILE_OFFSET_BITS=64 +CFLAGS+=$(CFLAGS_WARN) +BIT?=64 +ifeq ($(BIT),32) + CPU?=x86 +else + ifeq ($(BIT),64) + CPU?=x64 + endif +endif +ifeq ($(BIT),0) + BIT_OPT= +else + BIT_OPT=-m$(BIT) +endif +ifeq ($(MARCH),) +ifeq ($(shell expr $(GCC_VER) \> 4.2.1),1) + CFLAGS+=-march=native +endif +else + CFLAGS+=$(MARCH) +endif + +DEBUG=1 +ifeq ($(RELEASE),1) + DEBUG=0 +endif + +ifeq ($(DEBUG),0) + CFLAGS+=$(CFLAGS_OPT) + OBJDIR=release + OBJSUF= +else + ifeq ($(OS),Linux) + LDFLAGS+=-rdynamic + endif + OBJDIR=debug + OBJSUF=d +endif + +#################################################### + +LDFLAGS += -lpthread -m$(BIT) -lgmp -lgmpxx + +#################################################### + +TOPDIR:=$(realpath $(dir $(lastword $(MAKEFILE_LIST))))/ +EXTDIR:=$(TOPDIR)../cybozulib_ext/ +CFLAGS+= -I$(TOPDIR)include -I$(TOPDIR)../cybozulib/include/ -I$(TOPDIR)../xbyak/ $(BIT_OPT) $(INC_DIR) +LDFLAGS+= -L$(TOPDIR)lib $(BIT_OPT) -Wl,-rpath,'$$ORIGIN/../lib' $(LD_DIR) + +MKDEP = sh -ec '$(PRE)$(CC) -MM $(CFLAGS) $< | sed "s@\($*\)\.o[ :]*@$(OBJDIR)/\1.o $@ : @g" > $@; [ -s $@ ] || rm -f $@; touch $@' + +CLEAN=$(RM) $(TARGET) $(OBJDIR) + +define UNIT_TEST +sh -ec 'for i in $(TARGET); do $$i|grep "ctest:name"; done' > result.txt +grep -v "ng=0, exception=0" result.txt || echo "all unit tests are ok" +endef + +define SAMPLE_TEST +sh -ec 'for i in $(TARGET); do $$i; done' +endef + +.SUFFIXES: .cpp .d .exe + +$(OBJDIR)/%.o: %.cpp + $(PRE)$(CXX) -c $< -o $@ $(CFLAGS) + +$(OBJDIR)/%.d: %.cpp $(OBJDIR) + @$(MKDEP) + +$(TOPDIR)bin/%$(OBJSUF).exe: $(OBJDIR)/%.o $(LIBS) + $(PRE)$(CXX) $< -o $@ $(LIBS) $(LDFLAGS) + +OBJS=$(addprefix $(OBJDIR)/,$(SRC:.cpp=.o)) + +DEPEND_FILE=$(addprefix $(OBJDIR)/, $(SRC:.cpp=.d)) +TEST_FILE=$(addprefix $(TOPDIR)bin/, $(SRC:.cpp=$(OBJSUF).exe)) + +.PHONY: test + diff --git a/common.props b/common.props new file mode 100644 index 0000000..8ec3f67 --- /dev/null +++ b/common.props @@ -0,0 +1,26 @@ + + + + + + $(SolutionDir)bin\ + + + + $(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include + + + + + Level4 + MultiThreaded + + + _MBCS;%(PreprocessorDefinitions);NOMINMAX + + + $(SolutionDir)../cybozulib_ext/mpir/lib + + + + \ No newline at end of file diff --git a/debug.props b/debug.props new file mode 100644 index 0000000..d261c8d --- /dev/null +++ b/debug.props @@ -0,0 +1,14 @@ + + + + + + $(ProjectName)d + + + + MultiThreadedDebugDLL + + + + \ No newline at end of file diff --git a/include/mcl/ec.hpp b/include/mcl/ec.hpp new file mode 100644 index 0000000..8b70b70 --- /dev/null +++ b/include/mcl/ec.hpp @@ -0,0 +1,585 @@ +#pragma once +/** + @file + @brief elliptic curve + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include +#include +#include +#include +#include +#include + +namespace mcl { + +#define MCL_EC_USE_AFFINE 0 +#define MCL_EC_USE_PROJ 1 +#define MCL_EC_USE_JACOBI 2 + +//#define MCL_EC_COORD MCL_EC_USE_JACOBI +//#define MCL_EC_COORD MCL_EC_USE_PROJ +#ifndef MCL_EC_COORD + #define MCL_EC_COORD MCL_EC_USE_PROJ +#endif +/* + elliptic curve + y^2 = x^3 + ax + b (affine) + y^2 = x^3 + az^4 + bz^6 (Jacobi) x = X/Z^2, y = Y/Z^3 +*/ +template +class EcT : public ope::addsub, + ope::comparable, + ope::hasNegative > > > { + enum { + zero, + minus3, + generic + }; +public: + typedef _Fp Fp; + typedef typename Fp::BlockType BlockType; +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + Fp x, y; + bool inf_; +#else + mutable Fp x, y, z; +#endif + static Fp a_; + static Fp b_; + static int specialA_; + static bool compressedExpression_; +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + EcT() : inf_(true) {} +#else + EcT() { z.clear(); } +#endif + EcT(const Fp& _x, const Fp& _y) + { + set(_x, _y); + } + void normalize() const + { +#if MCL_EC_COORD == MCL_EC_USE_JACOBI + if (isZero() || z == 1) return; + Fp rz, rz2; + Fp::inv(rz, z); + rz2 = rz * rz; + x *= rz2; + y *= rz2 * rz; + z = 1; +#elif MCL_EC_COORD == MCL_EC_USE_PROJ + if (isZero() || z == 1) return; + Fp rz; + Fp::inv(rz, z); + x *= rz; + y *= rz; + z = 1; +#endif + } + + static inline void setParam(const std::string& astr, const std::string& bstr) + { + a_.fromStr(astr); + b_.fromStr(bstr); + if (a_.isZero()) { + specialA_ = zero; + } else if (a_ == -3) { + specialA_ = minus3; + } else { + specialA_ = generic; + } + } + static inline bool isValid(const Fp& _x, const Fp& _y) + { + return _y * _y == (_x * _x + a_) * _x + b_; + } + void set(const Fp& _x, const Fp& _y, bool verify = true) + { + if (verify && !isValid(_x, _y)) throw cybozu::Exception("ec:EcT:set") << _x << _y; + x = _x; y = _y; +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + inf_ = false; +#else + z = 1; +#endif + } + void clear() + { +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + inf_ = true; +#else + z = 0; +#endif + x.clear(); + y.clear(); + } + + static inline void dbl(EcT& R, const EcT& P, bool verifyInf = true) + { + if (verifyInf) { + if (P.isZero()) { + R.clear(); return; + } + } +#if MCL_EC_COORD == MCL_EC_USE_JACOBI + Fp S, M, t, y2; + Fp::square(y2, P.y); + Fp::mul(S, P.x, y2); + S += S; + S += S; + Fp::square(M, P.x); + switch (specialA_) { + case zero: + Fp::add(t, M, M); + M += t; + break; + case minus3: + Fp::square(t, P.z); + Fp::square(t, t); + M -= t; + Fp::add(t, M, M); + M += t; + break; + case generic: + default: + Fp::square(t, P.z); + Fp::square(t, t); + t *= a_; + t += M; + M += M; + M += t; + break; + } + Fp::square(R.x, M); + R.x -= S; + R.x -= S; + Fp::mul(R.z, P.y, P.z); + R.z += R.z; + Fp::square(y2, y2); + y2 += y2; + y2 += y2; + y2 += y2; + Fp::sub(R.y, S, R.x); + R.y *= M; + R.y -= y2; +#elif MCL_EC_COORD == MCL_EC_USE_PROJ + Fp w, t, h; + switch (specialA_) { + case zero: + Fp::square(w, P.x); + Fp::add(t, w, w); + w += t; + break; + case minus3: + Fp::square(w, P.x); + Fp::square(t, P.z); + w -= t; + Fp::add(t, w, w); + w += t; + break; + case generic: + default: + Fp::square(w, P.z); + w *= a_; + Fp::square(t, P.x); + w += t; + w += t; + w += t; // w = a z^2 + 3x^2 + break; + } + Fp::mul(R.z, P.y, P.z); // s = yz + Fp::mul(t, R.z, P.x); + t *= P.y; // xys + t += t; + t += t; // 4(xys) ; 4B + Fp::square(h, w); + h -= t; + h -= t; // w^2 - 8B + Fp::mul(R.x, h, R.z); + t -= h; // h is free + t *= w; + Fp::square(w, P.y); + R.x += R.x; + R.z += R.z; + Fp::square(h, R.z); + w *= h; + R.z *= h; + Fp::sub(R.y, t, w); + R.y -= w; +#else + Fp t, s; + Fp::square(t, P.x); + Fp::add(s, t, t); + t += s; + t += a_; + Fp::add(s, P.y, P.y); + t /= s; + Fp::square(s, t); + s -= P.x; + Fp x3; + Fp::sub(x3, s, P.x); + Fp::sub(s, P.x, x3); + s *= t; + Fp::sub(R.y, s, P.y); + R.x = x3; + R.inf_ = false; +#endif + } + static inline void add(EcT& R, const EcT& P, const EcT& Q) + { + if (P.isZero()) { R = Q; return; } + if (Q.isZero()) { R = P; return; } +#if MCL_EC_COORD == MCL_EC_USE_JACOBI + Fp r, U1, S1, H, H3; + Fp::square(r, P.z); + Fp::square(S1, Q.z); + Fp::mul(U1, P.x, S1); + Fp::mul(H, Q.x, r); + H -= U1; + r *= P.z; + S1 *= Q.z; + S1 *= P.y; + Fp::mul(r, Q.y, r); + r -= S1; + if (H.isZero()) { + if (r.isZero()) { + dbl(R, P, false); + } else { + R.clear(); + } + return; + } + Fp::mul(R.z, P.z, Q.z); + R.z *= H; + Fp::square(H3, H); // H^2 + Fp::square(R.y, r); // r^2 + U1 *= H3; // U1 H^2 + H3 *= H; // H^3 + R.y -= U1; + R.y -= U1; + Fp::sub(R.x, R.y, H3); + U1 -= R.x; + U1 *= r; + H3 *= S1; + Fp::sub(R.y, U1, H3); +#elif MCL_EC_COORD == MCL_EC_USE_PROJ + Fp r, PyQz, v, A, vv; + Fp::mul(r, P.x, Q.z); + Fp::mul(PyQz, P.y, Q.z); + Fp::mul(A, Q.y, P.z); + Fp::mul(v, Q.x, P.z); + v -= r; + if (v.isZero()) { + Fp::add(vv, A, PyQz); + if (vv.isZero()) { + R.clear(); + } else { + dbl(R, P, false); + } + return; + } + Fp::sub(R.y, A, PyQz); + Fp::square(A, R.y); + Fp::square(vv, v); + r *= vv; + vv *= v; + Fp::mul(R.z, P.z, Q.z); + A *= R.z; + R.z *= vv; + A -= vv; + vv *= PyQz; + A -= r; + A -= r; + Fp::mul(R.x, v, A); + r -= A; + R.y *= r; + R.y -= vv; +#else + Fp t; + Fp::neg(t, Q.y); + if (P.y == t) { R.clear(); return; } + Fp::sub(t, Q.x, P.x); + if (t.isZero()) { + dbl(R, P, false); + return; + } + Fp s; + Fp::sub(s, Q.y, P.y); + Fp::div(t, s, t); + R.inf_ = false; + Fp x3; + Fp::square(x3, t); + x3 -= P.x; + x3 -= Q.x; + Fp::sub(s, P.x, x3); + s *= t; + Fp::sub(R.y, s, P.y); + R.x = x3; +#endif + } + static inline void sub(EcT& R, const EcT& P, const EcT& Q) + { +#if 0 + if (P.inf_) { neg(R, Q); return; } + if (Q.inf_) { R = P; return; } + if (P.y == Q.y) { R.clear(); return; } + Fp t; + Fp::sub(t, Q.x, P.x); + if (t.isZero()) { + dbl(R, P, false); + return; + } + Fp s; + Fp::add(s, Q.y, P.y); + Fp::neg(s, s); + Fp::div(t, s, t); + R.inf_ = false; + Fp x3; + Fp::mul(x3, t, t); + x3 -= P.x; + x3 -= Q.x; + Fp::sub(s, P.x, x3); + s *= t; + Fp::sub(R.y, s, P.y); + R.x = x3; +#else + EcT nQ; + neg(nQ, Q); + add(R, P, nQ); +#endif + } + static inline void neg(EcT& R, const EcT& P) + { + if (P.isZero()) { + R.clear(); + return; + } +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + R.inf_ = false; + R.x = P.x; + Fp::neg(R.y, P.y); +#else + R.x = P.x; + Fp::neg(R.y, P.y); + R.z = P.z; +#endif + } + template + static inline void power(EcT& z, const EcT& x, const N& y) + { + power_impl::power(z, x, y); + } + /* + 0 <= P for any P + (Px, Py) <= (P'x, P'y) iff Px < P'x or Px == P'x and Py <= P'y + */ + static inline int compare(const EcT& P, const EcT& Q) + { + P.normalize(); + Q.normalize(); + if (P.isZero()) { + if (Q.isZero()) return 0; + return -1; + } + if (Q.isZero()) return 1; + int c = _Fp::compare(P.x, Q.x); + if (c > 0) return 1; + if (c < 0) return -1; + return _Fp::compare(P.y, Q.y); + } + bool isZero() const + { +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + return inf_; +#else + return z.isZero(); +#endif + } + friend inline std::ostream& operator<<(std::ostream& os, const EcT& self) + { + if (self.isZero()) { + return os << '0'; + } else { + self.normalize(); + os << self.x.toStr(16) << '_'; + if (compressedExpression_) { + return os << Fp::isYodd(self.y); + } else { + return os << self.y.toStr(16); + } + } + } + friend inline std::istream& operator>>(std::istream& is, EcT& self) + { + std::string str; + is >> str; + if (str == "0") { + self.clear(); + } else { +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + self.inf_ = false; +#else + self.z = 1; +#endif + size_t pos = str.find('_'); + if (pos == std::string::npos) throw cybozu::Exception("EcT:operator>>:bad format") << str; + str[pos] = '\0'; + self.x.fromStr(&str[0], 16); + if (compressedExpression_) { + const char c = str[pos + 1]; + if ((c == '0' || c == '1') && str.size() == pos + 2) { + getYfromX(self.y, self.x, c == '1'); + } else { + str[pos] = '_'; + throw cybozu::Exception("EcT:operator>>:bad y") << str; + } + } else { + self.y.fromStr(&str[pos + 1], 16); + } + } + return is; + } + static inline void setCompressedExpression(bool compressedExpression) + { + compressedExpression_ = compressedExpression; + } + /* + append to bv(not clear bv) + */ + void appendToBitVec(cybozu::BitVector& bv) const + { +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + #error "not implemented" +#else + normalize(); + const size_t bitLen = _Fp::getModBitLen(); + /* + elem |x|y|z| + size n n 1 if not compressed + size n 1 1 if compressed + */ + const size_t maxBitLen = compressedExpression_ ? (bitLen + 1 + 1) : (bitLen * 2 + 1); + if (isZero()) { + bv.resize(bv.size() + maxBitLen); + return; + } + x.appendToBitVec(bv); + if (compressedExpression_) { + bv.append(Fp::isYodd(y), 1); + } else { + y.appendToBitVec(bv); + } + bv.append(1, 1); // z = 1 +#endif + } + void fromBitVec(const cybozu::BitVector& bv) + { +#if MCL_EC_COORD == MCL_EC_USE_AFFINE + #error "not implemented" +#else + const size_t bitLen = _Fp::getModBitLen(); + const size_t maxBitLen = compressedExpression_ ? (bitLen + 1 + 1) : (bitLen * 2 + 1); + if (bv.size() != maxBitLen) { + throw cybozu::Exception("EcT:fromBitVec:bad size") << bv.size() << maxBitLen; + } + if (!bv.get(maxBitLen - 1)) { // if z = 0 + clear(); + return; + } + cybozu::BitVector t; + bv.extract(t, 0, bitLen); + x.fromBitVec(t); + if (compressedExpression_) { + bool odd = bv.get(bitLen); // y + getYfromX(y, x, odd); + } else { + bv.extract(t, bitLen, bitLen); + y.fromBitVec(t); + } + z = 1; +#endif + } + static inline size_t getBitVecSize() + { + const size_t bitLen = _Fp::getModBitLen(); + if (compressedExpression_) { + return bitLen + 2; + } else { + return bitLen * 2 + 1;; + } + } + static inline void getYfromX(Fp& y, const Fp& x, bool isYodd) + { + Fp t; + Fp::square(t, x); + t += a_; + t *= x; + t += b_; + Fp::squareRoot(y, t); + if (Fp::isYodd(y) ^ isYodd) { + Fp::neg(y, y); + } + } +}; + +template +struct TagMultiGr > { + static void square(EcT& z, const EcT& x) + { + EcT::dbl(z, x); + } + static void mul(EcT& z, const EcT& x, const EcT& y) + { + EcT::add(z, x, y); + } + static void inv(EcT& z, const EcT& x) + { + EcT::neg(z, x); + } + static void div(EcT& z, const EcT& x, const EcT& y) + { + EcT::sub(z, x, y); + } + static void init(EcT& x) + { + x.clear(); + } +}; + +template _Fp EcT<_Fp>::a_; +template _Fp EcT<_Fp>::b_; +template int EcT<_Fp>::specialA_; +template bool EcT<_Fp>::compressedExpression_; + +struct EcParam { + const char *name; + const char *p; + const char *a; + const char *b; + const char *gx; + const char *gy; + const char *n; + size_t bitLen; // bit length of p +}; + +} // mcl + +namespace std { CYBOZU_NAMESPACE_TR1_BEGIN +template struct hash; + +template +struct hash > { + size_t operator()(const mcl::EcT<_Fp>& P) const + { + if (P.isZero()) return 0; + P.normalize(); + uint64_t v = hash<_Fp>()(P.x); + v = hash<_Fp>()(P.y, v); + return static_cast(v); + } +}; + +CYBOZU_NAMESPACE_TR1_END } // std diff --git a/include/mcl/ecparam.hpp b/include/mcl/ecparam.hpp new file mode 100644 index 0000000..a5206e9 --- /dev/null +++ b/include/mcl/ecparam.hpp @@ -0,0 +1,161 @@ +#pragma once +/** + @file + @brief Elliptic curve parameter + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include + +namespace mcl { namespace ecparam { + +const struct mcl::EcParam secp160k1 = { + "secp160k1", + "0xfffffffffffffffffffffffffffffffeffffac73", + "0", + "7", + "0x3b4c382ce37aa192a4019e763036f4f5dd4d7ebb", + "0x938cf935318fdced6bc28286531733c3f03c4fee", + "0x100000000000000000001b8fa16dfab9aca16b6b3", + 160 +}; +// p=2^160 + 7 +const struct mcl::EcParam p160_1 = { + "p160_1", + "0x10000000000000000000000000000000000000007", + "10", + "1343632762150092499701637438970764818528075565078", + "1", + "1236612389951462151661156731535316138439983579284", + "1461501637330902918203683518218126812711137002561", + 161 +}; +const struct mcl::EcParam secp192k1 = { + "secp192k1", + "0xfffffffffffffffffffffffffffffffffffffffeffffee37", + "0", + "3", + "0xdb4ff10ec057e9ae26b07d0280b7f4341da5d1b1eae06c7d", + "0x9b2f2f6d9c5628a7844163d015be86344082aa88d95e2f9d", + "0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d", + 192 +}; +const struct mcl::EcParam secp224k1 = { + "secp224k1", + "0xfffffffffffffffffffffffffffffffffffffffffffffffeffffe56d", + "0", + "5", + "0xa1455b334df099df30fc28a169a467e9e47075a90f7e650eb6b7a45c", + "0x7e089fed7fba344282cafbd6f7e319f7c0b0bd59e2ca4bdb556d61a5", + "0x10000000000000000000000000001dce8d2ec6184caf0a971769fb1f7", + 224 +}; +const struct mcl::EcParam secp256k1 = { + "secp256k1", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f", + "0", + "7", + "0x79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798", + "0x483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8", + "0xfffffffffffffffffffffffffffffffebaaedce6af48a03bbfd25e8cd0364141", + 256 +}; +const struct mcl::EcParam secp384r1 = { + "secp384r1", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff", + "-3", + "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef", + "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7", + "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f", + "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973", + 384 +}; +const struct mcl::EcParam secp521r1 = { + "secp521r1", + "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "-3", + "0x51953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", + "0xc6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", + "0x11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", + "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409", + 521 +}; +const struct mcl::EcParam NIST_P192 = { + "NIST_P192", + "0xfffffffffffffffffffffffffffffffeffffffffffffffff", + "-3", + "0x64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1", + "0x188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012", + "0x07192b95ffc8da78631011ed6b24cdd573f977a11e794811", + "0xffffffffffffffffffffffff99def836146bc9b1b4d22831", + 192 +}; +const struct mcl::EcParam NIST_P224 = { + "NIST_P224", + "0xffffffffffffffffffffffffffffffff000000000000000000000001", + "-3", + "0xb4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", + "0xb70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", + "0xbd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", + "0xffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3d", + 224 +}; +const struct mcl::EcParam NIST_P256 = { + "NIST_P256", + "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff", + "-3", + "0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", + "0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", + "0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", + "0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551", + 256 +}; +// same secp384r1 +const struct mcl::EcParam NIST_P384 = { + "NIST_P384", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff", + "-3", + "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef", + "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7", + "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f", + "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf581a0db248b0a77aecec196accc52973", + 384 +}; +// same secp521r1 +const struct mcl::EcParam NIST_P521 = { + "NIST_P521", + "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "-3", + "0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", + "0xc6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", + "0x11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", + "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409", + 521 +}; + +} // mcl::ecparam + +static inline const mcl::EcParam* getEcParam(const std::string& name) +{ + static const mcl::EcParam *tbl[] = { + &ecparam::secp160k1, + &ecparam::secp192k1, + &ecparam::secp224k1, + &ecparam::secp256k1, + &ecparam::secp384r1, + &ecparam::secp521r1, + + &ecparam::NIST_P192, + &ecparam::NIST_P224, + &ecparam::NIST_P256, + &ecparam::NIST_P384, + &ecparam::NIST_P521, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + if (name == tbl[i]->name) return tbl[i]; + } + throw cybozu::Exception("mcl::getEcParam:not support name") << name; +} + +} // mcl diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp new file mode 100644 index 0000000..a8b980b --- /dev/null +++ b/include/mcl/fp.hpp @@ -0,0 +1,446 @@ +#pragma once +/** + @file + @brief finite field class + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include +#include +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4127) + #ifndef NOMINMAX + #define NOMINMAX + #endif +#endif +#if defined(_WIN64) || defined(__x86_64__) +// #define USE_MONT_FP +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef MCL_FP_BLOCK_MAX_BIT_N + #define MCL_FP_BLOCK_MAX_BIT_N 521 +#endif + +namespace mcl { + +struct Block { + typedef fp::Unit Unit; + const Unit *p; // pointer to original FpT.v_ + size_t n; + static const size_t UnitByteN = sizeof(Unit); + static const size_t maxUnitN = (MCL_FP_BLOCK_MAX_BIT_N + UnitByteN * 8 - 1) / (UnitByteN * 8); + Unit v_[maxUnitN]; +}; + +template +class FpT { + typedef fp::Unit Unit; + static const size_t UnitByteN = sizeof(Unit); + static const size_t maxUnitN = (maxBitN + UnitByteN * 8 - 1) / (UnitByteN * 8); + static fp::Op op_; + static mcl::SquareRoot sq_; + static size_t pBitLen_; + template friend class FpT; + Unit v_[maxUnitN]; +public: + // return pointer to array v_[] + const Unit *getUnit() const { return v_; } + size_t getUnitN() const { return op_.N; } + typedef Unit BlockType; + void dump() const + { + const size_t N = op_.N; + for (size_t i = 0; i < N; i++) { + printf("%016llx ", (long long)v_[N - 1 - i]); + } + printf("\n"); + } + static inline void setModulo(const std::string& mstr, int base = 0) + { + bool isMinus; + mpz_class mp; + inFromStr(mp, &isMinus, mstr, base); + if (isMinus) throw cybozu::Exception("mcl:FpT:setModulo:mstr is not minus") << mstr; + pBitLen_ = Gmp::getBitLen(mp); + if (pBitLen_ > maxBitN) throw cybozu::Exception("mcl:FpT:setModulo:too large bitLen") << pBitLen_ << maxBitN; + Unit p[maxUnitN] = {}; + const size_t n = Gmp::getRaw(p, maxUnitN, mp); + if (n == 0) throw cybozu::Exception("mcl:FpT:setModulo:bad mstr") << mstr; +#ifdef USE_MONT_FP + if (pBitLen_ <= 128) { op_ = fp::MontFp::init(p); } +#if CYBOZU_OS_BIT == 32 + else if (pBitLen_ <= 160) { static fp::MontFp f; op_ = f.init(p); } +#endif + else if (pBitLen_ <= 192) { static fp::MontFp f; op_ = f.init(p); } +#if CYBOZU_OS_BIT == 32 + else if (pBitLen_ <= 224) { static fp::MontFp f; op_ = f.init(p); } +#endif + else if (pBitLen_ <= 256) { static fp::MontFp f; op_ = f.init(p); } + else if (pBitLen_ <= 384) { static fp::MontFp f; op_ = f.init(p); } + else if (pBitLen_ <= 448) { static fp::MontFp f; op_ = f.init(p); } +#if CYBOZU_OS_BIT == 32 + else if (pBitLen_ <= 544) { static fp::MontFp f; op_ = f.init(p); } +#else + else if (pBitLen_ <= 576) { static fp::MontFp f; op_ = f.init(p); } +#endif + else { static fp::MontFp f; op_ = f.init(p); } +#else + if (pBitLen_ <= 128) { op_ = fp::FixedFp::init(p); } +#if CYBOZU_OS_BIT == 32 + else if (pBitLen_ <= 160) { static fp::FixedFp f; op_ = f.init(p); } +#endif + else if (pBitLen_ <= 192) { static fp::FixedFp f; op_ = f.init(p); } +#if CYBOZU_OS_BIT == 32 + else if (pBitLen_ <= 224) { static fp::FixedFp f; op_ = f.init(p); } +#endif + else if (pBitLen_ <= 256) { static fp::FixedFp f; op_ = f.init(p); } + else if (pBitLen_ <= 384) { static fp::FixedFp f; op_ = f.init(p); } + else if (pBitLen_ <= 448) { static fp::FixedFp f; op_ = f.init(p); } +#if CYBOZU_OS_BIT == 32 + else if (pBitLen_ <= 544) { static fp::FixedFp f; op_ = f.init(p); } +#else + else if (pBitLen_ <= 576) { static fp::FixedFp f; op_ = f.init(p); } +#endif + else { static fp::FixedFp f; op_ = f.init(p); } +#endif + assert(op_.N <= maxUnitN); + sq_.set(mp); + } + static inline void getModulo(std::string& pstr) + { + Gmp::toStr(pstr, op_.mp); + } + static inline bool isYodd(const FpT& x) + { + Block b; + x.getBlock(b); + return (b.p[0] & 1) == 1; + } + static inline bool squareRoot(FpT& y, const FpT& x) + { + mpz_class mx, my; + x.toGmp(mx); + bool b = sq_.get(my, mx); + if (!b) return false; + y.fromGmp(my); + return true; + } + FpT() {} + FpT(const FpT& x) + { + op_.copy(v_, x.v_); + } + FpT& operator=(const FpT& x) + { + op_.copy(v_, x.v_); + return *this; + } + void clear() + { + op_.clear(v_); + } + FpT(int64_t x) { operator=(x); } + explicit FpT(const std::string& str, int base = 0) + { + fromStr(str, base); + } + FpT& operator=(int64_t x) + { + clear(); + if (x) { + int64_t y = x < 0 ? -x : x; + if (sizeof(Unit) == 8) { + v_[0] = y; + } else { + v_[0] = (uint32_t)y; + v_[1] = (uint32_t)(y >> 32); + } + if (x < 0) neg(*this, *this); + toMont(*this, *this); + } + return *this; + } + void toMont(FpT& y, const FpT& x) + { + if (op_.toMont) op_.toMont(y.v_, x.v_); + } + void fromMont(FpT& y, const FpT& x) + { + if (op_.fromMont) op_.fromMont(y.v_, x.v_); + } + void fromStr(const std::string& str, int base = 0) + { + bool isMinus; + mpz_class x; + inFromStr(x, &isMinus, str, base); + if (x >= op_.mp) throw cybozu::Exception("fp:FpT:fromStr:large str") << str; + fp::local::toArray(v_, op_.N, x.get_mpz_t()); + if (isMinus) { + neg(*this, *this); + } + toMont(*this, *this); + } + // alias of fromStr + void set(const std::string& str, int base = 0) { fromStr(str, base); } + template + void setRaw(const S *inBuf, size_t n) + { + const size_t byteN = sizeof(S) * n; + const size_t fpByteN = sizeof(Unit) * op_.N; + if (byteN > fpByteN) throw cybozu::Exception("setRaw:bad n") << n << fpByteN; + assert(byteN <= fpByteN); + memcpy(v_, inBuf, byteN); + memset((char *)v_ + byteN, 0, fpByteN - byteN); + if (!isValid()) throw cybozu::Exception("setRaw:large value"); + toMont(*this, *this); + } + template + size_t getRaw(S *outBuf, size_t n) const + { + const size_t byteN = sizeof(S) * n; + const size_t fpByteN = sizeof(Unit) * op_.N; + if (byteN < fpByteN) throw cybozu::Exception("getRaw:bad n") << n << fpByteN; + assert(byteN >= fpByteN); + Block b; + getBlock(b); + memcpy(outBuf, b.p, fpByteN); + const size_t writeN = (fpByteN + sizeof(S) - 1) / sizeof(S); + memset((char *)outBuf + fpByteN, 0, writeN * sizeof(S) - fpByteN); + return writeN; + } + void getBlock(Block& b) const + { + assert(maxUnitN <= Block::maxUnitN); + b.n = op_.N; + if (op_.fromMont) { + op_.fromMont(b.v_, v_); + b.p = &b.v_[0]; + } else { + b.p = &v_[0]; + } + } + template + void setRand(RG& rg) + { + fp::getRandVal(v_, rg, op_.p, pBitLen_); + fromMont(*this, *this); + } + static inline void toStr(std::string& str, const Unit *x, size_t n, int base = 10, bool withPrefix = false) + { + switch (base) { + case 10: + { + mpz_class t; + Gmp::setRaw(t, x, n); + Gmp::toStr(str, t, 10); + } + return; + case 16: + mcl::fp::toStr16(str, x, n, withPrefix); + return; + case 2: + mcl::fp::toStr2(str, x, n, withPrefix); + return; + default: + throw cybozu::Exception("fp:FpT:toStr:bad base") << base; + } + } + void toStr(std::string& str, int base = 10, bool withPrefix = false) const + { + Block b; + getBlock(b); + toStr(str, b.p, b.n, base, withPrefix); + } + std::string toStr(int base = 10, bool withPrefix = false) const + { + std::string str; + toStr(str, base, withPrefix); + return str; + } + void toGmp(mpz_class& x) const + { + Block b; + getBlock(b); + Gmp::setRaw(x, b.p, b.n); + } + mpz_class toGmp() const + { + mpz_class x; + toGmp(x); + return x; + } + void fromGmp(const mpz_class& x) + { + setRaw(Gmp::getBlock(x), Gmp::getBlockSize(x)); + } + static inline void add(FpT& z, const FpT& x, const FpT& y) { op_.add(z.v_, x.v_, y.v_); } + static inline void sub(FpT& z, const FpT& x, const FpT& y) { op_.sub(z.v_, x.v_, y.v_); } + static inline void mul(FpT& z, const FpT& x, const FpT& y) { op_.mul(z.v_, x.v_, y.v_); } + static inline void inv(FpT& y, const FpT& x) { op_.inv(y.v_, x.v_); } + static inline void neg(FpT& y, const FpT& x) { op_.neg(y.v_, x.v_); } + static inline void square(FpT& y, const FpT& x) { op_.square(y.v_, x.v_); } + static inline void div(FpT& z, const FpT& x, const FpT& y) + { + FpT rev; + inv(rev, y); + mul(z, x, rev); + } + static inline void powerArray(FpT& z, const FpT& x, const Unit *y, size_t yn) + { + FpT out(1); + FpT t(x); + for (size_t i = 0; i < yn; i++) { + const Unit v = y[i]; + int m = (int)sizeof(Unit) * 8; + if (i == yn - 1) { + while (m > 0 && (v & (Unit(1) << (m - 1))) == 0) { + m--; + } + } + for (int j = 0; j < m; j++) { + if (v & (Unit(1) << j)) { + out *= t; + } + t *= t; + } + } + z = out; + } + template + static inline void power(FpT& z, const FpT& x, const FpT& y) + { + Block b; + y.getBlock(b); + powerArray(z, x, b.p, b.n); + } + static inline void power(FpT& z, const FpT& x, int y) + { + if (y < 0) throw cybozu::Exception("FpT:power with negative y is not support") << y; + const Unit u = y; + powerArray(z, x, &u, 1); + } + static inline void power(FpT& z, const FpT& x, const mpz_class& y) + { + if (y < 0) throw cybozu::Exception("FpT:power with negative y is not support") << y; + powerArray(z, x, Gmp::getBlock(y), Gmp::getBlockSize(x)); + } + bool isZero() const { return op_.isZero(v_); } + /* + append to bv(not clear bv) + */ + void appendToBitVec(cybozu::BitVector& bv) const + { + Block b; + getBlock(b); + bv.append(b.p, pBitLen_); + } + bool isValid() const + { + return fp::local::compareArray(v_, op_.p, op_.N) < 0; + } + void fromBitVec(const cybozu::BitVector& bv) + { + if (bv.size() != pBitLen_) throw cybozu::Exception("FpT:fromBitVec:bad size") << bv.size() << pBitLen_; + setRaw(bv.getBlock(), bv.getBlockSize()); + } + static inline size_t getModBitLen() { return pBitLen_; } + static inline size_t getBitVecSize() { return pBitLen_; } + bool operator==(const FpT& rhs) const { return fp::local::isEqualArray(v_, rhs.v_, op_.N); } + bool operator!=(const FpT& rhs) const { return !operator==(rhs); } + inline friend FpT operator+(const FpT& x, const FpT& y) { FpT z; add(z, x, y); return z; } + inline friend FpT operator-(const FpT& x, const FpT& y) { FpT z; sub(z, x, y); return z; } + inline friend FpT operator*(const FpT& x, const FpT& y) { FpT z; mul(z, x, y); return z; } + inline friend FpT operator/(const FpT& x, const FpT& y) { FpT z; div(z, x, y); return z; } + FpT& operator+=(const FpT& x) { add(*this, *this, x); return *this; } + FpT& operator-=(const FpT& x) { sub(*this, *this, x); return *this; } + FpT& operator*=(const FpT& x) { mul(*this, *this, x); return *this; } + FpT& operator/=(const FpT& x) { div(*this, *this, x); return *this; } + FpT operator-() const { FpT x; neg(x, *this); return x; } + friend inline std::ostream& operator<<(std::ostream& os, const FpT& self) + { + const std::ios_base::fmtflags f = os.flags(); + if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator<<:oct is not supported"); + const int base = (f & std::ios_base::hex) ? 16 : 10; + const bool showBase = (f & std::ios_base::showbase) != 0; + std::string str; + self.toStr(str, base, showBase); + return os << str; + } + friend inline std::istream& operator>>(std::istream& is, FpT& self) + { + const std::ios_base::fmtflags f = is.flags(); + if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator>>:oct is not supported"); + const int base = (f & std::ios_base::hex) ? 16 : 0; + std::string str; + is >> str; + self.fromStr(str, base); + return is; + } + /* + not support + getBitLen, operator<, > + */ + /* + QQQ : should be removed + */ + bool operator<(const FpT&) const { return false; } + static inline int compare(const FpT& x, const FpT& y) + { + Block xb, yb; + x.getBlock(xb); + y.getBlock(yb); + return fp::local::compareArray(xb.p, yb.p, xb.n); + } +private: + static inline void inFromStr(mpz_class& x, bool *isMinus, const std::string& str, int base) + { + const char *p = fp::verifyStr(isMinus, &base, str); + if (!Gmp::fromStr(x, p, base)) { + throw cybozu::Exception("fp:FpT:inFromStr") << str; + } + } +}; + +template fp::Op FpT::op_; +template mcl::SquareRoot FpT::sq_; +template size_t FpT::pBitLen_; + +namespace power_impl { + +templateclass FpT> +void power(G& z, const G& x, const FpT& y) +{ + Block b; + y.getBlock(b); + mcl::power_impl::powerArray(z, x, b.p, b.n); +} + +} // mcl::power_impl +} // mcl + +namespace std { CYBOZU_NAMESPACE_TR1_BEGIN +template struct hash; + +template +struct hash > : public std::unary_function, size_t> { + size_t operator()(const mcl::FpT& x, uint64_t v = 0) const + { + return static_cast(cybozu::hash64(x.getUnit(), x.getUnitN(), v)); + } +}; + +CYBOZU_NAMESPACE_TR1_END } // std::tr1 + +#ifdef _WIN32 + #pragma warning(pop) +#endif diff --git a/include/mcl/fp_base.hpp b/include/mcl/fp_base.hpp new file mode 100644 index 0000000..0fb174f --- /dev/null +++ b/include/mcl/fp_base.hpp @@ -0,0 +1,527 @@ +#pragma once +/** + @file + @brief basic operation + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4616) + #pragma warning(disable : 4800) + #pragma warning(disable : 4244) + #pragma warning(disable : 4127) + #pragma warning(disable : 4512) + #pragma warning(disable : 4146) +#endif +#include +#include +#include +#ifdef _MSC_VER + #pragma warning(pop) +#endif +#include +#ifdef USE_MONT_FP +#include +#endif + +namespace mcl { namespace fp { + +#if defined(CYBOZU_OS_BIT) && (CYBOZU_OS_BIT == 32) +typedef uint32_t Unit; +#else +typedef uint64_t Unit; +#endif + +typedef void (*void1op)(Unit*); +typedef void (*void2op)(Unit*, const Unit*); +typedef void (*void3op)(Unit*, const Unit*, const Unit*); +typedef void (*void4op)(Unit*, const Unit*, const Unit*, const Unit*); +typedef int (*int2op)(Unit*, const Unit*); +typedef void (*void4Iop)(Unit*, const Unit*, const Unit*, const Unit*, Unit); + +} } // mcl::fp + +#ifdef MCL_USE_LLVM + +extern "C" { + +#define MCL_FP_DEF_FUNC(len) \ +void mcl_fp_add ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_add ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_sub ## len ## S(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_sub ## len ## L(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_mul ## len ## pre(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); \ +void mcl_fp_mont ## len(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*, mcl::fp::Unit); + +MCL_FP_DEF_FUNC(128) +MCL_FP_DEF_FUNC(192) +MCL_FP_DEF_FUNC(256) +MCL_FP_DEF_FUNC(320) +MCL_FP_DEF_FUNC(384) +MCL_FP_DEF_FUNC(448) +MCL_FP_DEF_FUNC(512) +#if CYBOZU_OS_BIT == 32 +MCL_FP_DEF_FUNC(160) +MCL_FP_DEF_FUNC(224) +MCL_FP_DEF_FUNC(288) +MCL_FP_DEF_FUNC(352) +MCL_FP_DEF_FUNC(416) +MCL_FP_DEF_FUNC(480) +MCL_FP_DEF_FUNC(544) +#else +MCL_FP_DEF_FUNC(576) +#endif + +void mcl_fp_mul_NIST_P192(mcl::fp::Unit*, const mcl::fp::Unit*, const mcl::fp::Unit*); + +} + +#endif + +namespace mcl { namespace fp { + +namespace local { + +inline int compareArray(const Unit* x, const Unit* y, size_t n) +{ + for (size_t i = n - 1; i != size_t(-1); i--) { + if (x[i] < y[i]) return -1; + if (x[i] > y[i]) return 1; + } + return 0; +} + +inline bool isEqualArray(const Unit* x, const Unit* y, size_t n) +{ + for (size_t i = 0; i < n; i++) { + if (x[i] != y[i]) return false; + } + return true; +} + +inline bool isZeroArray(const Unit *x, size_t n) +{ + for (size_t i = 0; i < n; i++) { + if (x[i]) return false; + } + return true; +} + +inline void clearArray(Unit *x, size_t begin, size_t end) +{ + for (size_t i = begin; i < end; i++) x[i] = 0; +} + +inline void copyArray(Unit *y, const Unit *x, size_t n) +{ + for (size_t i = 0; i < n; i++) y[i] = x[i]; +} + +inline void toArray(Unit *y, size_t yn, const mpz_srcptr x) +{ + const int xn = x->_mp_size; + assert(xn >= 0); + const Unit* xp = (const Unit*)x->_mp_d; + assert(xn <= (int)yn); + copyArray(y, xp, xn); + clearArray(y, xn, yn); +} + +} // mcl::fp +struct TagDefault; + +struct Op { + mpz_class mp; + const Unit* p; + size_t N; + bool (*isZero)(const Unit*); + void1op clear; + void2op neg; + void2op inv; + void2op square; + void2op copy; + void3op add; + void3op sub; + void3op mul; + // for Montgomery + void2op toMont; + void2op fromMont; + Op() + : p(0), N(0), isZero(0), clear(0), neg(0), inv(0) + , square(0), copy(0),add(0), sub(0), mul(0), toMont(0), fromMont(0) + { + } +}; + +template +struct FixedFp { + typedef fp::Unit Unit; + static const size_t N = (bitN + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8); + static mpz_class mp_; + static Unit p_[N]; + static inline void setModulo(const Unit* p) + { + assert(N >= 2); + assert(sizeof(mp_limb_t) == sizeof(Unit)); + copy(p_, p); + Gmp::setRaw(mp_, p, N); + } + static inline void set_mpz_t(mpz_t& z, const Unit* p, int n = (int)N) + { + z->_mp_alloc = n; + int i = n; + while (i > 0 && p[i - 1] == 0) { + i--; + } + z->_mp_size = i; + z->_mp_d = (mp_limb_t*)const_cast(p); + } + static inline void set_zero(mpz_t& z, Unit *p, size_t n) + { + z->_mp_alloc = (int)n; + z->_mp_size = 0; + z->_mp_d = (mp_limb_t*)p; + } + static inline void clear(Unit *x) + { + local::clearArray(x, 0, N); + } + static inline void copy(Unit *y, const Unit *x) + { + local::copyArray(y, x, N); + } + static inline void add(Unit *z, const Unit *x, const Unit *y) + { + Unit ret[N + 2]; // not N + 1 + mpz_t mz, mx, my; + set_zero(mz, ret, N + 2); + set_mpz_t(mx, x); + set_mpz_t(my, y); + mpz_add(mz, mx, my); + if (mpz_cmp(mz, mp_.get_mpz_t()) >= 0) { + mpz_sub(mz, mz, mp_.get_mpz_t()); + } + local::toArray(z, N, mz); + } +#ifdef MCL_USE_LLVM +#if CYBOZU_OS_BIT == 64 + static inline void add128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add128S(z, x, y, p_); } + static inline void sub128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub128S(z, x, y, p_); } + static inline void add192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add192S(z, x, y, p_); } + static inline void sub192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub192S(z, x, y, p_); } + static inline void add256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add256S(z, x, y, p_); } + static inline void sub256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub256S(z, x, y, p_); } + static inline void add384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add384L(z, x, y, p_); } + static inline void sub384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub384L(z, x, y, p_); } + + static inline void add576(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add576L(z, x, y, p_); } + static inline void sub576(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub576L(z, x, y, p_); } +#else + static inline void add128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add128S(z, x, y, p_); } + static inline void sub128(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub128S(z, x, y, p_); } + static inline void add192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add192L(z, x, y, p_); } + static inline void sub192(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub192L(z, x, y, p_); } + static inline void add256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add256L(z, x, y, p_); } + static inline void sub256(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub256L(z, x, y, p_); } + static inline void add384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add384L(z, x, y, p_); } + static inline void sub384(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub384L(z, x, y, p_); } + + static inline void add160(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add160L(z, x, y, p_); } + static inline void sub160(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub160L(z, x, y, p_); } + static inline void add224(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add224L(z, x, y, p_); } + static inline void sub224(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub224L(z, x, y, p_); } + static inline void add544(Unit *z, const Unit *x, const Unit *y) { mcl_fp_add544L(z, x, y, p_); } + static inline void sub544(Unit *z, const Unit *x, const Unit *y) { mcl_fp_sub544L(z, x, y, p_); } +#endif +#endif + static inline void sub(Unit *z, const Unit *x, const Unit *y) + { + Unit ret[N + 1]; + mpz_t mz, mx, my; + set_zero(mz, ret, N + 1); + set_mpz_t(mx, x); + set_mpz_t(my, y); + mpz_sub(mz, mx, my); + if (mpz_sgn(mz) < 0) { + mpz_add(mz, mz, mp_.get_mpz_t()); + } + local::toArray(z, N, mz); + } + static inline void mul(Unit *z, const Unit *x, const Unit *y) + { + Unit ret[N * 2]; +#ifdef MCL_USE_LLVM +#if CYBOZU_OS_BIT == 64 + if (bitN <= 128) { mcl_fp_mul128pre(ret, x, y); mod(z, ret); return; } + if (bitN <= 192) { mcl_fp_mul192pre(ret, x, y); mod(z, ret); return; } + if (bitN <= 256) { mcl_fp_mul256pre(ret, x, y); mod(z, ret); return; } + if (bitN <= 384) { mcl_fp_mul384pre(ret, x, y); mod(z, ret); return; } +// if (bitN <= 576) { mcl_fp_mul576pre(ret, x, y); mod(z, ret); return; } +#else + if (bitN <= 128) { mcl_fp_mul128pre(ret, x, y); mod(z, ret); return; } + if (bitN <= 160) { mcl_fp_mul160pre(ret, x, y); mod(z, ret); return; } + if (bitN <= 192) { mcl_fp_mul192pre(ret, x, y); mod(z, ret); return; } + if (bitN <= 224) { mcl_fp_mul224pre(ret, x, y); mod(z, ret); return; } +// if (bitN <= 256) { mcl_fp_mul256pre(ret, x, y); mod(z, ret); return; } +// if (bitN <= 384) { mcl_fp_mul384pre(ret, x, y); mod(z, ret); return; } +// if (bitN <= 544) { mcl_fp_mul544pre(ret, x, y); mod(z, ret); return; } +#endif +#endif +#if 0 + pre_mul(ret, x, y); + mod(z, ret); +#else + mpz_t mx, my, mz; + set_zero(mz, ret, N * 2); + set_mpz_t(mx, x); + set_mpz_t(my, y); + mpz_mul(mz, mx, my); + mpz_mod(mz, mz, mp_.get_mpz_t()); + local::toArray(z, N, mz); +#endif + } + static inline void pre_mul(Unit *z, const Unit *x, const Unit *y) + { + mpz_t mx, my, mz; + set_zero(mz, z, N * 2); + set_mpz_t(mx, x); + set_mpz_t(my, y); + mpz_mul(mz, mx, my); + local::toArray(z, N * 2, mz); + } + // x[N * 2] -> y[N] + static inline void mod(Unit *y, const Unit *x) + { + mpz_t mx, my; + set_mpz_t(mx, x, N * 2); + set_mpz_t(my, y, N); + mpz_mod(my, mx, mp_.get_mpz_t()); + local::clearArray(y, my->_mp_size, N); + } + static inline void square(Unit *z, const Unit *x) + { + mul(z, x, x); // QQQ : use powMod with 2? + } + static inline void inv(Unit *y, const Unit *x) + { + mpz_class my; + mpz_t mx; + set_mpz_t(mx, x); + mpz_invert(my.get_mpz_t(), mx, mp_.get_mpz_t()); + local::toArray(y, N, my.get_mpz_t()); + } + static inline bool isZero(const Unit *x) + { + return local::isZeroArray(x, N); + } + static inline void neg(Unit *y, const Unit *x) + { + if (isZero(x)) { + if (x != y) clear(y); + return; + } + sub(y, p_, x); + } + static inline Op init(const Unit *p) + { + setModulo(p); + Op op; + op.N = N; + op.isZero = &isZero; + op.clear = &clear; + op.neg = &neg; + op.inv = &inv; + op.square = □ + op.copy = © +#ifdef MCL_USE_LLVM + printf("fp2 use llvm bitN=%zd\n", bitN); + if (bitN <= 128) { + op.add = &add128; + op.sub = &sub128; + } else +#if CYBOZU_OS_BIT == 32 + if (bitN <= 160) { + op.add = &add160; + op.sub = &sub160; + } else +#endif + if (bitN <= 192) { + op.add = &add192; + op.sub = &sub192; + } else +#if CYBOZU_OS_BIT == 32 + if (bitN <= 224) { + op.add = &add224; + op.sub = &sub224; + } else +#endif + if (bitN <= 256) { + op.add = &add256; + op.sub = &sub256; + } else + if (bitN <= 384) { + op.add = &add384; + op.sub = &sub384; + } else +#if CYBOZU_OS_BIT == 64 + if (bitN <= 576) { + op.add = &add576; + op.sub = &sub576; + } else +#else + if (bitN <= 544) { + op.add = &add544; + op.sub = &sub544; + } else +#endif +#endif + { + op.add = &add; + op.sub = ⊂ + } +#ifdef MCL_USE_LLVM + if (mp_ == mpz_class("0xfffffffffffffffffffffffffffffffeffffffffffffffff")) { + op.mul = &mcl_fp_mul_NIST_P192; // slower than MontFp192 + } else +#endif + { + op.mul = &mul; + } + op.mp = mp_; + op.p = &p_[0]; + return op; + } +}; + +template mpz_class FixedFp::mp_; +template fp::Unit FixedFp::p_[FixedFp::N]; + +#ifdef USE_MONT_FP +template +struct MontFp { + typedef fp::Unit Unit; + static const size_t N = (bitN + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8); + static const size_t invTblN = N * sizeof(Unit) * 8 * 2; + static mpz_class mp_; +// static mcl::SquareRoot sq_; + static Unit p_[N]; + static Unit one_[N]; + static Unit R_[N]; // (1 << (N * 64)) % p + static Unit RR_[N]; // (R * R) % p + static Unit invTbl_[invTblN][N]; + static size_t modBitLen_; + static FpGenerator fg_; + static void3op add_; + static void3op mul_; + + static inline void fromRawGmp(Unit *y, const mpz_class& x) + { + local::toArray(y, N, x.get_mpz_t()); + } + static inline void setModulo(const Unit *p) + { + copy(p_, p); + Gmp::setRaw(mp_, p, N); +// sq_.set(pOrg_); + + mpz_class t = 1; + fromRawGmp(one_, t); + t = (t << (N * 64)) % mp_; + fromRawGmp(R_, t); + t = (t * t) % mp_; + fromRawGmp(RR_, t); + fg_.init(p_, N); + + add_ = Xbyak::CastTo(fg_.add_); + mul_ = Xbyak::CastTo(fg_.mul_); + } + static void initInvTbl(Unit invTbl[invTblN][N]) + { + Unit t[N]; + clear(t); + t[0] = 2; + toMont(t, t); + for (int i = 0; i < invTblN; i++) { + copy(invTbl[invTblN - 1 - i], t); + add_(t, t, t); + } + } + static inline void clear(Unit *x) + { + local::clearArray(x, 0, N); + } + static inline void copy(Unit *y, const Unit *x) + { + local::copyArray(y, x, N); + } + static inline bool isZero(const Unit *x) + { + return local::isZeroArray(x, N); + } + static inline void invC(Unit *y, const Unit *x) + { + const int2op preInv = Xbyak::CastTo(fg_.preInv_); + Unit r[N]; + int k = preInv(r, x); + /* + xr = 2^k + R = 2^(N * 64) + get r2^(-k)R^2 = r 2^(N * 64 * 2 - k) + */ + mul_(y, r, invTbl_[k]); + } + static inline void squareC(Unit *y, const Unit *x) + { + mul_(y, x, x); + } + static inline void toMont(Unit *y, const Unit *x) + { + mul_(y, x, RR_); + } + static inline void fromMont(Unit *y, const Unit *x) + { + mul_(y, x, one_); + } + static inline Op init(const Unit *p) + { +puts("use MontFp2"); + setModulo(p); + Op op; + op.N = N; + op.isZero = &isZero; + op.clear = &clear; + op.neg = Xbyak::CastTo(fg_.neg_); + op.inv = &invC; + op.square = Xbyak::CastTo(fg_.sqr_); + if (op.square == 0) op.square = &squareC; + op.copy = © + op.add = add_; + op.sub = Xbyak::CastTo(fg_.sub_); + op.mul = mul_; + op.mp = mp_; + op.p = &p_[0]; + op.toMont = &toMont; + op.fromMont = &fromMont; + +// shr1 = Xbyak::CastTo(fg_.shr1_); +// addNc = Xbyak::CastTo(fg_.addNc_); +// subNc = Xbyak::CastTo(fg_.subNc_); + initInvTbl(invTbl_); + return op; + } +}; +template mpz_class MontFp::mp_; +template fp::Unit MontFp::p_[MontFp::N]; +template fp::Unit MontFp::one_[MontFp::N]; +template fp::Unit MontFp::R_[MontFp::N]; +template fp::Unit MontFp::RR_[MontFp::N]; +template fp::Unit MontFp::invTbl_[MontFp::invTblN][MontFp::N]; +template size_t MontFp::modBitLen_; +template FpGenerator MontFp::fg_; +template void3op MontFp::add_; +template void3op MontFp::mul_; +#endif + +} } // mcl::fp diff --git a/include/mcl/fp_generator.hpp b/include/mcl/fp_generator.hpp new file mode 100644 index 0000000..9820ca9 --- /dev/null +++ b/include/mcl/fp_generator.hpp @@ -0,0 +1,1675 @@ +#pragma once +/** + @file + @brief Fp generator + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include +#include +#include + +namespace mcl { + +namespace montgomery { + +/* + get pp such that p * pp = -1 mod M, + where p is prime and M = 1 << 64(or 32). + @param pLow [in] p mod M + T is uint32_t or uint64_t +*/ +template +T getCoff(T pLow) +{ + T ret = 0; + T t = 0; + T x = 1; + + for (size_t i = 0; i < sizeof(T) * 8; i++) { + if ((t & 1) == 0) { + t += pLow; + ret += x; + } + t >>= 1; + x <<= 1; + } + return ret; +} + +} } // mcl::montgomery + +#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64) + +#ifndef XBYAK_NO_OP_NAMES + #define XBYAK_NO_OP_NAMES +#endif +#include +#include + +namespace mcl { + +namespace fp_gen_local { + +class MemReg { + const Xbyak::Reg64 *r_; + const Xbyak::RegExp *m_; + size_t offset_; +public: + MemReg(const Xbyak::Reg64 *r, const Xbyak::RegExp *m, size_t offset) : r_(r), m_(m), offset_(offset) {} + bool isReg() const { return r_ != 0; } + const Xbyak::Reg64& getReg() const { return *r_; } + Xbyak::RegExp getMem() const { return *m_ + offset_ * sizeof(size_t); } +}; + +struct MixPack { + static const size_t useAll = 100; + Xbyak::util::Pack p; + Xbyak::RegExp m; + size_t mn; + MixPack() : mn(0) {} + MixPack(Xbyak::util::Pack& remain, size_t& rspPos, size_t n, size_t useRegNum = useAll) + { + init(remain, rspPos, n, useRegNum); + } + void init(Xbyak::util::Pack& remain, size_t& rspPos, size_t n, size_t useRegNum = useAll) + { + size_t pn = std::min(remain.size(), n); + if (useRegNum != useAll && useRegNum < pn) pn = useRegNum; + this->mn = n - pn; + this->m = Xbyak::util::rsp + rspPos; + this->p = remain.sub(0, pn); + remain = remain.sub(pn); + rspPos += mn * 8; + } + size_t size() const { return p.size() + mn; } + bool isReg(size_t n) const { return n < p.size(); } + const Xbyak::Reg64& getReg(size_t n) const + { + assert(n < p.size()); + return p[n]; + } + Xbyak::RegExp getMem(size_t n) const + { + const size_t pn = p.size(); + assert(pn <= n && n < size()); + return m + (int)((n - pn) * sizeof(size_t)); + } + MemReg operator[](size_t n) const + { + const size_t pn = p.size(); + return MemReg((n < pn) ? &p[n] : 0, (n < pn) ? 0 : &m, n - pn); + } + void removeLast() + { + if (!size()) throw cybozu::Exception("MixPack:removeLast:empty"); + if (mn > 0) { + mn--; + } else { + p = p.sub(0, p.size() - 1); + } + } + /* + replace Mem with r if possible + */ + bool replaceMemWith(Xbyak::CodeGenerator *code, const Xbyak::Reg64& r) + { + if (mn == 0) return false; + p.append(r); + code->mov(r, code->ptr [m]); + m = m + 8; + mn--; + return true; + } +}; + +} // fp_gen_local + +/* + op(r, rm); + r : reg + rm : Reg/Mem +*/ +#define MCL_FP_GEN_OP_RM(op, r, rm) \ +if (rm.isReg()) { \ + op(r, rm.getReg()); \ +} else { \ + op(r, qword [rm.getMem()]); \ +} + +/* + op(rm, r); + rm : Reg/Mem + r : reg +*/ +#define MCL_FP_GEN_OP_MR(op, rm, r) \ +if (rm.isReg()) { \ + op(rm.getReg(), r); \ +} else { \ + op(qword [rm.getMem()], r); \ +} + +struct FpGenerator : Xbyak::CodeGenerator { + typedef Xbyak::RegExp RegExp; + typedef Xbyak::Reg64 Reg64; + typedef Xbyak::Xmm Xmm; + typedef Xbyak::Operand Operand; + typedef Xbyak::util::StackFrame StackFrame; + typedef Xbyak::util::Pack Pack; + typedef fp_gen_local::MixPack MixPack; + typedef fp_gen_local::MemReg MemReg; + static const int UseRDX = Xbyak::util::UseRDX; + static const int UseRCX = Xbyak::util::UseRCX; + Xbyak::util::Cpu cpu_; + bool useMulx_; + const uint64_t *p_; + uint64_t pp_; + int pn_; + bool isFullBit_; + // add/sub without carry. return true if overflow + typedef bool (*bool3op)(uint64_t*, const uint64_t*, const uint64_t*); + + // add/sub with mod + typedef void (*void3op)(uint64_t*, const uint64_t*, const uint64_t*); + + // mul without carry. return top of z + typedef uint64_t (*uint3opI)(uint64_t*, const uint64_t*, uint64_t); + + // neg + typedef void (*void2op)(uint64_t*, const uint64_t*); + + // preInv + typedef int (*int2op)(uint64_t*, const uint64_t*); + bool3op addNc_; + bool3op subNc_; + void3op add_; + void3op sub_; + void3op mul_; + uint3opI mulI_; + void2op sqr_; + void2op neg_; + void2op shr1_; + int2op preInv_; + FpGenerator() + : CodeGenerator(4096 * 8) + , p_(0) + , pp_(0) + , pn_(0) + , isFullBit_(0) + , addNc_(0) + , subNc_(0) + , add_(0) + , sub_(0) + , mul_(0) + , mulI_(0) + , neg_(0) + , shr1_(0) + , preInv_(0) + { + useMulx_ = cpu_.has(Xbyak::util::Cpu::tBMI2); + } + /* + @param p [in] pointer to prime + @param pn [in] length of prime + */ + void init(const uint64_t *p, int pn) + { + if (pn < 2) throw cybozu::Exception("mcl:FpGenerator:small pn") << pn; + p_ = p; + pp_ = montgomery::getCoff(p[0]); + pn_ = pn; + isFullBit_ = (p_[pn_ - 1] >> 63) != 0; +// printf("p=%p, pn_=%d, isFullBit_=%d\n", p_, pn_, isFullBit_); + + setSize(0); // reset code + align(16); + addNc_ = getCurr(); + gen_addSubNc(true); + align(16); + subNc_ = getCurr(); + gen_addSubNc(false); + align(16); + add_ = getCurr(); + gen_addMod(); + align(16); + sub_ = getCurr(); + gen_sub(); + align(16); + neg_ = getCurr(); + gen_neg(); + align(16); + mulI_ = getCurr(); + gen_mulI(); + align(16); + mul_ = getCurr(); + gen_mul(); + align(16); + sqr_ = getCurr(); + if (!gen_sqr()) { + sqr_ = 0; + } + align(16); + shr1_ = getCurr(); + gen_shr1(); + preInv_ = getCurr(); + gen_preInv(); + } + void gen_addSubNc(bool isAdd) + { + StackFrame sf(this, 3); + if (isAdd) { + gen_raw_add(sf.p[0], sf.p[1], sf.p[2], rax); + } else { + gen_raw_sub(sf.p[0], sf.p[1], sf.p[2], rax); + } + setc(al); + movzx(eax, al); + } + /* + pz[] = px[] + py[] + */ + void gen_raw_add(const RegExp& pz, const RegExp& px, const RegExp& py, const Reg64& t) + { + mov(t, ptr [px]); + add(t, ptr [py]); + mov(ptr [pz], t); + for (int i = 1; i < pn_; i++) { + mov(t, ptr [px + i * 8]); + adc(t, ptr [py + i * 8]); + mov(ptr [pz + i * 8], t); + } + } + /* + pz[] = px[] - py[] + */ + void gen_raw_sub(const RegExp& pz, const RegExp& px, const RegExp& py, const Reg64& t) + { + mov(t, ptr [px]); + sub(t, ptr [py]); + mov(ptr [pz], t); + for (int i = 1; i < pn_; i++) { + mov(t, ptr [px + i * 8]); + sbb(t, ptr [py + i * 8]); + mov(ptr [pz + i * 8], t); + } + } + /* + pz[] = -px[] + */ + void gen_raw_neg(const RegExp& pz, const RegExp& px, const Reg64& t0, const Reg64& t1) + { + inLocalLabel(); + mov(t0, ptr [px]); + test(t0, t0); + jnz(".neg"); + if (pn_ > 1) { + for (int i = 1; i < pn_; i++) { + or_(t0, ptr [px + i * 8]); + } + jnz(".neg"); + } + // zero + for (int i = 0; i < pn_; i++) { + mov(ptr [pz + i * 8], t0); + } + jmp(".exit"); + L(".neg"); + mov(t1, (size_t)p_); + gen_raw_sub(pz, t1, px, t0); + L(".exit"); + outLocalLabel(); + } + /* + (rdx:pz[0..n-1]) = px[0..n-1] * y + use t, rax, rdx + if n > 2 + use + wk[0] if useMulx_ + wk[0..n-2] otherwise + */ + void gen_raw_mulI(const RegExp& pz, const RegExp& px, const Reg64& y, const MixPack& wk, const Reg64& t, size_t n) + { + assert(n >= 2); + if (n == 2) { + mov(rax, ptr [px]); + mul(y); + mov(ptr [pz], rax); + mov(t, rdx); + mov(rax, ptr [px + 8]); + mul(y); + add(rax, t); + adc(rdx, 0); + mov(ptr [pz + 8], rax); + return; + } + if (useMulx_) { + assert(wk.size() > 0 && wk.isReg(0)); + const Reg64& t1 = wk.getReg(0); + // mulx(H, L, x) = [H:L] = x * rdx + mov(rdx, y); + mulx(t1, rax, ptr [px]); // [y:rax] = px * y + mov(ptr [pz], rax); + const Reg64 *pt0 = &t; + const Reg64 *pt1 = &t1; + for (size_t i = 1; i < n - 1; i++) { + mulx(*pt0, rax, ptr [px + i * 8]); + if (i == 1) { + add(rax, *pt1); + } else { + adc(rax, *pt1); + } + mov(ptr [pz + i * 8], rax); + std::swap(pt0, pt1); + } + mulx(rdx, rax, ptr [px + (n - 1) * 8]); + adc(rax, *pt1); + mov(ptr [pz + (n - 1) * 8], rax); + adc(rdx, 0); + return; + } + assert(wk.size() >= n - 1); + for (size_t i = 0; i < n; i++) { + mov(rax, ptr [px + i * 8]); + mul(y); + if (i < n - 1) { + mov(ptr [pz + i * 8], rax); + g_mov(wk[i], rdx); + } + } + for (size_t i = 1; i < n - 1; i++) { + mov(t, ptr [pz + i * 8]); + if (i == 1) { + g_add(t, wk[i - 1]); + } else { + g_adc(t, wk[i - 1]); + } + mov(ptr [pz + i * 8], t); + } + g_adc(rax, wk[n - 2]); + mov(ptr [pz + (n - 1) * 8], rax); + adc(rdx, 0); + } + void gen_mulI() + { + assert(pn_ >= 2); + const int regNum = useMulx_ ? 2 : (1 + std::min(pn_ - 1, 8)); + const int stackSize = useMulx_ ? 0 : (pn_ - 1) * 8; + StackFrame sf(this, 3, regNum | UseRDX, stackSize); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + const Reg64& y = sf.p[2]; + size_t rspPos = 0; + Pack remain = sf.t.sub(1); + MixPack wk(remain, rspPos, pn_ - 1); + gen_raw_mulI(pz, px, y, wk, sf.t[0], pn_); + mov(rax, rdx); + } + /* + pz[] = px[] + */ + void gen_mov(const RegExp& pz, const RegExp& px, const Reg64& t, int n) + { + for (int i = 0; i < n; i++) { + mov(t, ptr [px + i * 8]); + mov(ptr [pz + i * 8], t); + } + } + void gen_addMod3() + { + StackFrame sf(this, 3, 7); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + const Reg64& py = sf.p[2]; + + const Reg64& t0 = sf.t[0]; + const Reg64& t1 = sf.t[1]; + const Reg64& t2 = sf.t[2]; + const Reg64& t3 = sf.t[3]; + const Reg64& t4 = sf.t[4]; + const Reg64& t5 = sf.t[5]; + const Reg64& t6 = sf.t[6]; + + xor_(t6, t6); + load_rm(Pack(t2, t1, t0), px); + add_rm(Pack(t2, t1, t0), py); + mov_rr(Pack(t5, t4, t3), Pack(t2, t1, t0)); + adc(t6, 0); + mov(rax, (size_t)p_); + sub_rm(Pack(t5, t4, t3), rax); + sbb(t6, 0); + cmovc(t5, t2); + cmovc(t4, t1); + cmovc(t3, t0); + store_mr(pz, Pack(t5, t4, t3)); + } + void gen_subMod_le4(int n) + { + assert(2 <= n && n <= 4); + StackFrame sf(this, 3, (n - 1) * 2); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + const Reg64& py = sf.p[2]; + + Pack rx = sf.t.sub(0, n - 1); + rx.append(px); // rx = [px, t1, t0] + Pack ry = sf.t.sub(n - 1, n - 1); + ry.append(rax); // ry = [rax, t3, t2] + + load_rm(rx, px); // destroy px + sub_rm(rx, py); +#if 0 + sbb(ry[0], ry[0]); // rx[0] = (x > y) ? 0 : -1 + for (int i = 1; i < n; i++) mov(ry[i], ry[0]); + mov(py, (size_t)p_); + for (int i = 0; i < n; i++) and_(ry[i], qword [py + 8 * i]); + add_rr(rx, ry); +#else + // a little faster + sbb(py, py); // py = (x > y) ? 0 : -1 + mov(rax, (size_t)p_); + load_rm(ry, rax); // destroy rax + for (size_t i = 0; i < ry.size(); i++) { + and_(ry[i], py); + } + add_rr(rx, ry); +#endif + store_mr(pz, rx); + } + void gen_addMod() + { + if (pn_ == 3) { + gen_addMod3(); + return; + } + StackFrame sf(this, 3, 0, pn_ * 8); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + const Reg64& py = sf.p[2]; + const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR; + + inLocalLabel(); + gen_raw_add(pz, px, py, rax); + mov(px, (size_t)p_); // destroy px + if (isFullBit_) { + jc(".over", jmpMode); + } +#ifdef MCL_USE_JMP + for (int i = 0; i < pn_; i++) { + mov(py, ptr [pz + (pn_ - 1 - i) * 8]); // destroy py + cmp(py, ptr [px + (pn_ - 1 - i) * 8]); + jc(".exit", jmpMode); + jnz(".over", jmpMode); + } + L(".over"); + gen_raw_sub(pz, pz, px, rax); + L(".exit"); +#else + gen_raw_sub(rsp, pz, px, rax); + jc(".exit", jmpMode); + gen_mov(pz, rsp, rax, pn_); + if (isFullBit_) { + jmp(".exit", jmpMode); + L(".over"); + gen_raw_sub(pz, pz, px, rax); + } + L(".exit"); +#endif + outLocalLabel(); + } + void gen_sub() + { + if (pn_ <= 4) { + gen_subMod_le4(pn_); + return; + } + StackFrame sf(this, 3); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + const Reg64& py = sf.p[2]; + const Xbyak::CodeGenerator::LabelType jmpMode = pn_ < 5 ? T_AUTO : T_NEAR; + + inLocalLabel(); + gen_raw_sub(pz, px, py, rax); + jnc(".exit", jmpMode); + mov(px, (size_t)p_); + gen_raw_add(pz, pz, px, rax); + L(".exit"); + outLocalLabel(); + } + void gen_neg() + { + StackFrame sf(this, 2, 2); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + gen_raw_neg(pz, px, sf.t[0], sf.t[1]); + } + void gen_shr1() + { + const int c = 1; + StackFrame sf(this, 2, 1); + const Reg64 *t0 = &rax; + const Reg64 *t1 = &sf.t[0]; + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + mov(*t0, ptr [px]); + for (int i = 0; i < pn_ - 1; i++) { + mov(*t1, ptr [px + 8 * (i + 1)]); + shrd(*t0, *t1, c); + mov(ptr [pz + i * 8], *t0); + std::swap(t0, t1); + } + shr(*t0, c); + mov(ptr [pz + (pn_ - 1) * 8], *t0); + } + void gen_mul() + { + if (pn_ == 3) { + gen_montMul3(p_, pp_); + } else if (pn_ == 4) { + gen_montMul4(p_, pp_); + } else if (pn_ <= 9) { + gen_montMulN(p_, pp_, pn_); + } else { + throw cybozu::Exception("mcl:FpGenerator:gen_mul:not implemented for") << pn_; + } + } + bool gen_sqr() + { + if (pn_ == 3) { + gen_montSqr3(p_, pp_); + return true; + } + return false; + } + /* + input (pz[], px[], py[]) + z[] <- montgomery(x[], y[]) + */ + void gen_montMulN(const uint64_t *p, uint64_t pp, int n) + { + assert(2 <= pn_ && pn_ <= 9); + const int regNum = useMulx_ ? 4 : 3 + std::min(n - 1, 7); + const int stackSize = (n * 3 + (isFullBit_ ? 2 : 1)) * 8; + StackFrame sf(this, 3, regNum | UseRDX, stackSize); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; + const Reg64& py = sf.p[2]; + const Reg64& y = sf.t[0]; + const Reg64& pAddr = sf.t[1]; + const Reg64& t = sf.t[2]; + Pack remain = sf.t.sub(3); + size_t rspPos = 0; + + MixPack pw1(remain, rspPos, n - 1); + const RegExp pw2 = rsp + rspPos; // pw2[0..n-1] + const RegExp pc = pw2 + n * 8; // pc[0..n+1] + mov(pAddr, (size_t)p); + + for (int i = 0; i < n; i++) { + mov(y, ptr [py + i * 8]); + montgomeryN_1(pp, n, pc, px, y, pAddr, t, pw1, pw2, i == 0); + } + // pz[] = pc[] - p[] + gen_raw_sub(pz, pc, pAddr, t); + if (isFullBit_) sbb(qword[pc + n * 8], 0); + jnc("@f"); + for (int i = 0; i < n; i++) { + mov(t, ptr [pc + i * 8]); + mov(ptr [pz + i * 8], t); + } + L("@@"); + } + /* + input (z, x, y) = (p0, p1, p2) + z[0..3] <- montgomery(x[0..3], y[0..3]) + destroy gt0, ..., gt9, xm0, xm1, p2 + */ + void gen_montMul4(const uint64_t *p, uint64_t pp) + { + StackFrame sf(this, 3, 10 | UseRDX); + const Reg64& p0 = sf.p[0]; + const Reg64& p1 = sf.p[1]; + const Reg64& p2 = sf.p[2]; + + const Reg64& t0 = sf.t[0]; + const Reg64& t1 = sf.t[1]; + const Reg64& t2 = sf.t[2]; + const Reg64& t3 = sf.t[3]; + const Reg64& t4 = sf.t[4]; + const Reg64& t5 = sf.t[5]; + const Reg64& t6 = sf.t[6]; + const Reg64& t7 = sf.t[7]; + const Reg64& t8 = sf.t[8]; + const Reg64& t9 = sf.t[9]; + + movq(xm0, p0); // save p0 + mov(p0, (uint64_t)p); + movq(xm1, p2); + mov(p2, ptr [p2]); + montgomery4_1(pp, t0, t7, t3, t2, t1, p1, p2, p0, t4, t5, t6, t8, t9, true, xm2); + + movq(p2, xm1); + mov(p2, ptr [p2 + 8]); + montgomery4_1(pp, t1, t0, t7, t3, t2, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2); + + movq(p2, xm1); + mov(p2, ptr [p2 + 16]); + montgomery4_1(pp, t2, t1, t0, t7, t3, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2); + + movq(p2, xm1); + mov(p2, ptr [p2 + 24]); + montgomery4_1(pp, t3, t2, t1, t0, t7, p1, p2, p0, t4, t5, t6, t8, t9, false, xm2); + // [t7:t3:t2:t1:t0] + + mov(t4, t0); + mov(t5, t1); + mov(t6, t2); + mov(rdx, t3); + sub_rm(Pack(t3, t2, t1, t0), p0); + if (isFullBit_) sbb(t7, 0); + cmovc(t0, t4); + cmovc(t1, t5); + cmovc(t2, t6); + cmovc(t3, rdx); + + movq(p0, xm0); // load p0 + store_mr(p0, Pack(t3, t2, t1, t0)); + } + /* + input (z, x, y) = (p0, p1, p2) + z[0..2] <- montgomery(x[0..2], y[0..2]) + destroy gt0, ..., gt9, xm0, xm1, p2 + */ + void gen_montMul3(const uint64_t *p, uint64_t pp) + { + StackFrame sf(this, 3, 10 | UseRDX); + const Reg64& p0 = sf.p[0]; + const Reg64& p1 = sf.p[1]; + const Reg64& p2 = sf.p[2]; + + const Reg64& t0 = sf.t[0]; + const Reg64& t1 = sf.t[1]; + const Reg64& t2 = sf.t[2]; + const Reg64& t3 = sf.t[3]; + const Reg64& t4 = sf.t[4]; + const Reg64& t5 = sf.t[5]; + const Reg64& t6 = sf.t[6]; + const Reg64& t7 = sf.t[7]; + const Reg64& t8 = sf.t[8]; + const Reg64& t9 = sf.t[9]; + + movq(xm0, p0); // save p0 + mov(t7, (uint64_t)p); + mov(t9, ptr [p2]); + // c3, c2, c1, c0, px, y, p, + montgomery3_1(pp, t0, t3, t2, t1, p1, t9, t7, t4, t5, t6, t8, p0, true); + mov(t9, ptr [p2 + 8]); + montgomery3_1(pp, t1, t0, t3, t2, p1, t9, t7, t4, t5, t6, t8, p0, false); + + mov(t9, ptr [p2 + 16]); + montgomery3_1(pp, t2, t1, t0, t3, p1, t9, t7, t4, t5, t6, t8, p0, false); + + // [(t3):t2:t1:t0] + mov(t4, t0); + mov(t5, t1); + mov(t6, t2); + sub_rm(Pack(t2, t1, t0), t7); + if (isFullBit_) sbb(t3, 0); + cmovc(t0, t4); + cmovc(t1, t5); + cmovc(t2, t6); + movq(p0, xm0); + store_mr(p0, Pack(t2, t1, t0)); + } + /* + input (pz, px) + z[0..2] <- montgomery(px[0..2], px[0..2]) + destroy gt0, ..., gt9, xm0, xm1, p2 + */ + void gen_montSqr3(const uint64_t *p, uint64_t pp) + { + StackFrame sf(this, 3, 10 | UseRDX, 16 * 3); + const Reg64& pz = sf.p[0]; + const Reg64& px = sf.p[1]; +// const Reg64& py = sf.p[2]; // not used + + const Reg64& t0 = sf.t[0]; + const Reg64& t1 = sf.t[1]; + const Reg64& t2 = sf.t[2]; + const Reg64& t3 = sf.t[3]; + const Reg64& t4 = sf.t[4]; + const Reg64& t5 = sf.t[5]; + const Reg64& t6 = sf.t[6]; + const Reg64& t7 = sf.t[7]; + const Reg64& t8 = sf.t[8]; + const Reg64& t9 = sf.t[9]; + + movq(xm0, pz); // save pz + mov(t7, (uint64_t)p); + mov(t9, ptr [px]); + mul3x1_sqr1(px, t9, t3, t2, t1, t0); + mov(t0, rdx); + montgomery3_sub(pp, t0, t9, t2, t1, px, t3, t7, t4, t5, t6, t8, pz, true); + + mov(t3, ptr [px + 8]); + mul3x1_sqr2(px, t3, t6, t5, t4); + add_rr(Pack(t1, t0, t9, t2), Pack(rdx, rax, t5, t4)); + if (isFullBit_) setc(pz.cvt8()); + montgomery3_sub(pp, t1, t3, t9, t2, px, t0, t7, t4, t5, t6, t8, pz, false); + + mov(t0, ptr [px + 16]); + mul3x1_sqr3(t0, t5, t4); + add_rr(Pack(t2, t1, t3, t9), Pack(rdx, rax, t5, t4)); + if (isFullBit_) setc(pz.cvt8()); + montgomery3_sub(pp, t2, t0, t3, t9, px, t1, t7, t4, t5, t6, t8, pz, false); + + // [t9:t2:t0:t3] + mov(t4, t3); + mov(t5, t0); + mov(t6, t2); + sub_rm(Pack(t2, t0, t3), t7); + if (isFullBit_) sbb(t9, 0); + cmovc(t3, t4); + cmovc(t0, t5); + cmovc(t2, t6); + movq(pz, xm0); + store_mr(pz, Pack(t2, t0, t3)); + } + static inline void debug_put_inner(const uint64_t *ptr, int n) + { + printf("debug "); + for (int i = 0; i < n; i++) { + printf("%016llx", (long long)ptr[n - 1 - i]); + } + printf("\n"); + } +#ifdef _MSC_VER + void debug_put(const RegExp& m, int n) + { + assert(n <= 8); + static uint64_t regBuf[7]; + + push(rax); + mov(rax, (size_t)regBuf); + mov(ptr [rax + 8 * 0], rcx); + mov(ptr [rax + 8 * 1], rdx); + mov(ptr [rax + 8 * 2], r8); + mov(ptr [rax + 8 * 3], r9); + mov(ptr [rax + 8 * 4], r10); + mov(ptr [rax + 8 * 5], r11); + mov(rcx, ptr [rsp + 8]); // org rax + mov(ptr [rax + 8 * 6], rcx); // save + mov(rcx, ptr [rax + 8 * 0]); // org rcx + pop(rax); + + lea(rcx, ptr [m]); + mov(rdx, n); + mov(rax, (size_t)debug_put_inner); + sub(rsp, 32); + call(rax); + add(rsp, 32); + + push(rax); + mov(rax, (size_t)regBuf); + mov(rcx, ptr [rax + 8 * 0]); + mov(rdx, ptr [rax + 8 * 1]); + mov(r8, ptr [rax + 8 * 2]); + mov(r9, ptr [rax + 8 * 3]); + mov(r10, ptr [rax + 8 * 4]); + mov(r11, ptr [rax + 8 * 5]); + mov(rax, ptr [rax + 8 * 6]); + add(rsp, 8); + } +#endif + /* + z >>= c + @note shrd(r/m, r, imm) + */ + void shr_mp(const MixPack& z, uint8_t c, const Reg64& t) + { + const size_t n = z.size(); + for (size_t i = 0; i < n - 1; i++) { + const Reg64 *p; + if (z.isReg(i + 1)) { + p = &z.getReg(i + 1); + } else { + mov(t, ptr [z.getMem(i + 1)]); + p = &t; + } + if (z.isReg(i)) { + shrd(z.getReg(i), *p, c); + } else { + shrd(qword [z.getMem(i)], *p, c); + } + } + if (z.isReg(n - 1)) { + shr(z.getReg(n - 1), c); + } else { + shr(qword [z.getMem(n - 1)], c); + } + } + /* + z *= 2 + */ + void twice_mp(const MixPack& z, const Reg64& t) + { + g_add(z[0], z[0], t); + for (size_t i = 1, n = z.size(); i < n; i++) { + g_adc(z[i], z[i], t); + } + } + /* + z += x + */ + void add_mp(const MixPack& z, const MixPack& x, const Reg64& t) + { + assert(z.size() == x.size()); + g_add(z[0], x[0], t); + for (size_t i = 1, n = z.size(); i < n; i++) { + g_adc(z[i], x[i], t); + } + } + void add_m_m(const RegExp& mz, const RegExp& mx, const Reg64& t, int n) + { + for (int i = 0; i < n; i++) { + mov(t, ptr [mx + i * 8]); + if (i == 0) { + add(ptr [mz + i * 8], t); + } else { + adc(ptr [mz + i * 8], t); + } + } + } + /* + mz[] = mx[] - y + */ + void sub_m_mp_m(const RegExp& mz, const RegExp& mx, const MixPack& y, const Reg64& t) + { + for (size_t i = 0; i < y.size(); i++) { + mov(t, ptr [mx + i * 8]); + if (i == 0) { + if (y.isReg(i)) { + sub(t, y.getReg(i)); + } else { + sub(t, ptr [y.getMem(i)]); + } + } else { + if (y.isReg(i)) { + sbb(t, y.getReg(i)); + } else { + sbb(t, ptr [y.getMem(i)]); + } + } + mov(ptr [mz + i * 8], t); + } + } + /* + z -= x + */ + void sub_mp(const MixPack& z, const MixPack& x, const Reg64& t) + { + assert(z.size() == x.size()); + g_sub(z[0], x[0], t); + for (size_t i = 1, n = z.size(); i < n; i++) { + g_sbb(z[i], x[i], t); + } + } + /* + z -= px[] + */ + void sub_mp_m(const MixPack& z, const RegExp& px, const Reg64& t) + { + if (z.isReg(0)) { + sub(z.getReg(0), ptr [px]); + } else { + mov(t, ptr [px]); + sub(ptr [z.getMem(0)], t); + } + for (size_t i = 1, n = z.size(); i < n; i++) { + if (z.isReg(i)) { + sbb(z.getReg(i), ptr [px + i * 8]); + } else { + mov(t, ptr [px + i * 8]); + sbb(ptr [z.getMem(i)], t); + } + } + } + void store_mp(const RegExp& m, const MixPack& z, const Reg64& t) + { + for (size_t i = 0, n = z.size(); i < n; i++) { + if (z.isReg(i)) { + mov(ptr [m + i * 8], z.getReg(i)); + } else { + mov(t, ptr [z.getMem(i)]); + mov(ptr [m + i * 8], t); + } + } + } + void load_mp(const MixPack& z, const RegExp& m, const Reg64& t) + { + for (size_t i = 0, n = z.size(); i < n; i++) { + if (z.isReg(i)) { + mov(z.getReg(i), ptr [m + i * 8]); + } else { + mov(t, ptr [m + i * 8]); + mov(ptr [z.getMem(i)], t); + } + } + } + void set_mp(const MixPack& z, const Reg64& t) + { + for (size_t i = 0, n = z.size(); i < n; i++) { + MCL_FP_GEN_OP_MR(mov, z[i], t) + } + } + void mov_mp(const MixPack& z, const MixPack& x, const Reg64& t) + { + for (size_t i = 0, n = z.size(); i < n; i++) { + const MemReg zi = z[i], xi = x[i]; + if (z.isReg(i)) { + MCL_FP_GEN_OP_RM(mov, zi.getReg(), xi) + } else { + if (x.isReg(i)) { + mov(ptr [z.getMem(i)], x.getReg(i)); + } else { + mov(t, ptr [x.getMem(i)]); + mov(ptr [z.getMem(i)], t); + } + } + } + } +#ifdef _MSC_VER + void debug_put_mp(const MixPack& mp, int n, const Reg64& t) + { + if (n >= 10) exit(1); + static uint64_t buf[10]; + movq(xm0, rax); + mov(rax, (size_t)buf); + store_mp(rax, mp, t); + movq(rax, xm0); + push(rax); + mov(rax, (size_t)buf); + debug_put(rax, n); + pop(rax); + } +#endif + + std::string mkLabel(const char *label, int n) const + { + return std::string(label) + Xbyak::Label::toStr(n); + } + /* + int k = preInvC(pr, px) + */ + void gen_preInv() + { + assert(pn_ >= 2); + const int freeRegNum = 13; + if (pn_ > 9) { + throw cybozu::Exception("mcl:FpGenerator:gen_preInv:large pn_") << pn_; + } + StackFrame sf(this, 2, 10 | UseRDX | UseRCX, (std::max(0, pn_ * 5 - freeRegNum) + 1 + (isFullBit_ ? 1 : 0)) * 8); + const Reg64& pr = sf.p[0]; + const Reg64& px = sf.p[1]; + const Reg64& t = rcx; + /* + k = rax, t = rcx : temporary + use rdx, pr, px in main loop, so we can use 13 registers + v = t[0, pn_) : all registers + */ + size_t rspPos = 0; + + assert(sf.t.size() >= (size_t)pn_); + Pack remain = sf.t; + + const MixPack rr(remain, rspPos, pn_); + remain.append(rdx); + const MixPack ss(remain, rspPos, pn_); + remain.append(px); + const int rSize = (int)remain.size(); + MixPack vv(remain, rspPos, pn_, rSize > 0 ? rSize / 2 : -1); + remain.append(pr); + MixPack uu(remain, rspPos, pn_); + + const RegExp keep_pr = rsp + rspPos; + rspPos += 8; + const RegExp rTop = rsp + rspPos; // used if isFullBit_ + + inLocalLabel(); + mov(ptr [keep_pr], pr); + mov(rax, px); + // px is free frome here + load_mp(vv, rax, t); // v = x + mov(rax, (size_t)p_); + load_mp(uu, rax, t); // u = p_ + // k = 0 + xor_(rax, rax); + // rTop = 0 + if (isFullBit_) { + mov(ptr [rTop], rax); + } + // r = 0; + set_mp(rr, rax); + // s = 1 + set_mp(ss, rax); + if (ss.isReg(0)) { + mov(ss.getReg(0), 1); + } else { + mov(qword [ss.getMem(0)], 1); + } +#if 0 + L(".lp"); + or_mp(vv, t); + jz(".exit", T_NEAR); + + g_test(uu[0], 1); + jz(".u_even", T_NEAR); + g_test(vv[0], 1); + jz(".v_even", T_NEAR); + for (int i = pn_ - 1; i >= 0; i--) { + g_cmp(vv[i], uu[i], t); + jc(".v_lt_u", T_NEAR); + if (i > 0) jnz(".v_ge_u", T_NEAR); + } + + L(".v_ge_u"); + sub_mp(vv, uu, t); + add_mp(ss, rr, t); + L(".v_even"); + shr_mp(vv, 1, t); + twice_mp(rr, t); + if (isFullBit_) { + sbb(t, t); + mov(ptr [rTop], t); + } + inc(rax); + jmp(".lp", T_NEAR); + L(".v_lt_u"); + sub_mp(uu, vv, t); + add_mp(rr, ss, t); + if (isFullBit_) { + sbb(t, t); + mov(ptr [rTop], t); + } + L(".u_even"); + shr_mp(uu, 1, t); + twice_mp(ss, t); + inc(rax); + jmp(".lp", T_NEAR); +#else + for (int cn = pn_; cn > 0; cn--) { + const std::string _lp = mkLabel(".lp", cn); + const std::string _u_v_odd = mkLabel(".u_v_odd", cn); + const std::string _u_even = mkLabel(".u_even", cn); + const std::string _v_even = mkLabel(".v_even", cn); + const std::string _v_ge_u = mkLabel(".v_ge_u", cn); + const std::string _v_lt_u = mkLabel(".v_lt_u", cn); + L(_lp); + or_mp(vv, t); + jz(".exit", T_NEAR); + + g_test(uu[0], 1); + jz(_u_even, T_NEAR); + g_test(vv[0], 1); + jz(_v_even, T_NEAR); + L(_u_v_odd); + if (cn > 1) { + isBothZero(vv[cn - 1], uu[cn - 1], t); + jz(mkLabel(".u_v_odd", cn - 1), T_NEAR); + } + for (int i = cn - 1; i >= 0; i--) { + g_cmp(vv[i], uu[i], t); + jc(_v_lt_u, T_NEAR); + if (i > 0) jnz(_v_ge_u, T_NEAR); + } + + L(_v_ge_u); + sub_mp(vv, uu, t); + add_mp(ss, rr, t); + L(_v_even); + shr_mp(vv, 1, t); + twice_mp(rr, t); + if (isFullBit_) { + sbb(t, t); + mov(ptr [rTop], t); + } + inc(rax); + jmp(_lp, T_NEAR); + L(_v_lt_u); + sub_mp(uu, vv, t); + add_mp(rr, ss, t); + if (isFullBit_) { + sbb(t, t); + mov(ptr [rTop], t); + } + L(_u_even); + shr_mp(uu, 1, t); + twice_mp(ss, t); + inc(rax); + jmp(_lp, T_NEAR); + + if (cn > 0) { + vv.removeLast(); + uu.removeLast(); + } + } +#endif + L(".exit"); + assert(ss.isReg(0) && ss.isReg(1)); + const Reg64& t2 = ss.getReg(0); + const Reg64& t3 = ss.getReg(1); + + mov(t2, (size_t)p_); + if (isFullBit_) { + mov(t, ptr [rTop]); + test(t, t); + jz("@f"); + sub_mp_m(rr, t2, t); + L("@@"); + } + mov(t3, ptr [keep_pr]); + // pr[] = p[] - rr + sub_m_mp_m(t3, t2, rr, t); + jnc("@f"); + // pr[] += p[] + add_m_m(t3, t2, t, pn_); + L("@@"); + outLocalLabel(); + } + void mov32c(const Reg64& r, uint64_t c) + { + if (c & 0xffffffff00000000ULL) { + mov(r, c); + } else { + mov(Xbyak::Reg32(r.getIdx()), (uint32_t)c); + } + } +private: + FpGenerator(const FpGenerator&); + void operator=(const FpGenerator&); + void make_op_rm(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const Reg64& op1, const MemReg& op2) + { + if (op2.isReg()) { + (this->*op)(op1, op2.getReg()); + } else { + (this->*op)(op1, qword [op2.getMem()]); + } + } + void make_op_mr(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const MemReg& op1, const Reg64& op2) + { + if (op1.isReg()) { + (this->*op)(op1.getReg(), op2); + } else { + (this->*op)(qword [op1.getMem()], op2); + } + } + void make_op(void (Xbyak::CodeGenerator::*op)(const Xbyak::Operand&, const Xbyak::Operand&), const MemReg& op1, const MemReg& op2, const Reg64& t) + { + if (op1.isReg()) { + make_op_rm(op, op1.getReg(), op2); + } else if (op2.isReg()) { + (this->*op)(ptr [op1.getMem()], op2.getReg()); + } else { + mov(t, ptr [op2.getMem()]); + (this->*op)(ptr [op1.getMem()], t); + } + } + void g_add(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::add, op1, op2, t); } + void g_adc(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::adc, op1, op2, t); } + void g_sub(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::sub, op1, op2, t); } + void g_sbb(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::sbb, op1, op2, t); } + void g_cmp(const MemReg& op1, const MemReg& op2, const Reg64& t) { make_op(&Xbyak::CodeGenerator::cmp, op1, op2, t); } + void g_or(const Reg64& r, const MemReg& op) { make_op_rm(&Xbyak::CodeGenerator::or_, r, op); } + void g_test(const MemReg& op1, const MemReg& op2, const Reg64& t) + { + const MemReg *pop1 = &op1; + const MemReg *pop2 = &op2; + if (!pop1->isReg()) { + std::swap(pop1, pop2); + } + // (M, M), (R, M), (R, R) + if (pop1->isReg()) { + MCL_FP_GEN_OP_MR(test, (*pop2), pop1->getReg()) + } else { + mov(t, ptr [pop1->getMem()]); + test(ptr [pop2->getMem()], t); + } + } + void g_mov(const MemReg& op, const Reg64& r) + { + make_op_mr(&Xbyak::CodeGenerator::mov, op, r); + } + void g_mov(const Reg64& r, const MemReg& op) + { + make_op_rm(&Xbyak::CodeGenerator::mov, r, op); + } + void g_add(const Reg64& r, const MemReg& mr) { MCL_FP_GEN_OP_RM(add, r, mr) } + void g_adc(const Reg64& r, const MemReg& mr) { MCL_FP_GEN_OP_RM(adc, r, mr) } + void isBothZero(const MemReg& op1, const MemReg& op2, const Reg64& t) + { + g_mov(t, op1); + g_or(t, op2); + } + void g_test(const MemReg& op, int imm) + { + MCL_FP_GEN_OP_MR(test, op, imm) + } + /* + z[] = x[] + */ + void mov_rr(const Pack& z, const Pack& x) + { + assert(z.size() == x.size()); + for (int i = 0, n = (int)x.size(); i < n; i++) { + mov(z[i], x[i]); + } + } + /* + m[] = x[] + */ + void store_mr(const RegExp& m, const Pack& x) + { + for (int i = 0, n = (int)x.size(); i < n; i++) { + mov(ptr [m + 8 * i], x[i]); + } + } + /* + x[] = m[] + */ + void load_rm(const Pack& z, const RegExp& m) + { + for (int i = 0, n = (int)z.size(); i < n; i++) { + mov(z[i], ptr [m + 8 * i]); + } + } + /* + z[] += x[] + */ + void add_rr(const Pack& z, const Pack& x) + { + add(z[0], x[0]); + assert(z.size() == x.size()); + for (size_t i = 1, n = z.size(); i < n; i++) { + adc(z[i], x[i]); + } + } + /* + z[] -= x[] + */ + void sub_rr(const Pack& z, const Pack& x) + { + sub(z[0], x[0]); + assert(z.size() == x.size()); + for (size_t i = 1, n = z.size(); i < n; i++) { + sbb(z[i], x[i]); + } + } + /* + z[] += m[] + */ + void add_rm(const Pack& z, const RegExp& m) + { + add(z[0], ptr [m + 8 * 0]); + for (int i = 1, n = (int)z.size(); i < n; i++) { + adc(z[i], ptr [m + 8 * i]); + } + } + /* + z[] -= m[] + */ + void sub_rm(const Pack& z, const RegExp& m) + { + sub(z[0], ptr [m + 8 * 0]); + for (int i = 1, n = (int)z.size(); i < n; i++) { + sbb(z[i], ptr [m + 8 * i]); + } + } + /* + t = all or z[i] + ZF = z is zero + */ + void or_mp(const MixPack& z, const Reg64& t) + { + const size_t n = z.size(); + if (n == 1) { + if (z.isReg(0)) { + test(z.getReg(0), z.getReg(0)); + } else { + mov(t, ptr [z.getMem(0)]); + test(t, t); + } + } else { + g_mov(t, z[0]); + for (size_t i = 1; i < n; i++) { + g_or(t, z[i]); + } + } + } + /* + [rdx:x:t1:t0] <- py[2:1:0] * x + destroy x, t + */ + void mul3x1(const RegExp& py, const Reg64& x, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t) + { + if (useMulx_) { + // mulx(H, L, x) = [H:L] = x * rdx + /* + rdx:x + t:t1 + rax:t0 + */ + mov(rdx, x); + mulx(rax, t0, ptr [py]); // [rax:t0] = py[0] * x + mulx(t, t1, ptr [py + 8]); // [t:t1] = py[1] * x + add(t1, rax); + mulx(rdx, x, ptr [py + 8 * 2]); + adc(x, t); + adc(rdx, 0); + } else { + mov(rax, ptr [py]); + mul(x); + mov(t0, rax); + mov(t1, rdx); + mov(rax, ptr [py + 8]); + mul(x); + mov(t, rax); + mov(t2, rdx); + mov(rax, ptr [py + 8 * 2]); + mul(x); + /* + rdx:rax + t2:t + t1:t0 + */ + add(t1, t); + adc(rax, t2); + adc(rdx, 0); + mov(x, rax); + } + } + /* + [x2:x1:x0] * x0 + */ + void mul3x1_sqr1(const RegExp& px, const Reg64& x0, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t) + { + mov(rax, x0); + mul(x0); + mov(t0, rax); + mov(t1, rdx); + mov(rax, ptr [px + 8]); + mul(x0); + mov(ptr [rsp + 0 * 8], rax); // (x0 * x1)_L + mov(ptr [rsp + 1 * 8], rdx); // (x0 * x1)_H + mov(t, rax); + mov(t2, rdx); + mov(rax, ptr [px + 8 * 2]); + mul(x0); + mov(ptr [rsp + 2 * 8], rax); // (x0 * x2)_L + mov(ptr [rsp + 3 * 8], rdx); // (x0 * x2)_H + /* + rdx:rax + t2:t + t1:t0 + */ + add(t1, t); + adc(t2, rax); + adc(rdx, 0); + } + /* + [x2:x1:x0] * x1 + */ + void mul3x1_sqr2(const RegExp& px, const Reg64& x1, const Reg64& t2, const Reg64& t1, const Reg64& t0) + { + mov(t0, ptr [rsp + 0 * 8]);// (x0 * x1)_L + mov(rax, x1); + mul(x1); + mov(t1, rax); + mov(t2, rdx); + mov(rax, ptr [px + 8 * 2]); + mul(x1); + mov(ptr [rsp + 4 * 8], rax); // (x1 * x2)_L + mov(ptr [rsp + 5 * 8], rdx); // (x1 * x2)_H + /* + rdx:rax + t2:t1 + t:t0 + */ + add(t1, ptr [rsp + 1 * 8]); // (x0 * x1)_H + adc(rax, t2); + adc(rdx, 0); + } + /* + [rdx:rax:t1:t0] = [x2:x1:x0] * x2 + */ + void mul3x1_sqr3(const Reg64& x2, const Reg64& t1, const Reg64& t0) + { + mov(rax, x2); + mul(x2); + /* + rdx:rax + t2:t + t1:t0 + */ + mov(t0, ptr [rsp + 2 * 8]); // (x0 * x2)_L + mov(t1, ptr [rsp + 3 * 8]); // (x0 * x2)_H + add(t1, ptr [rsp + 4 * 8]); // (x1 * x2)_L + adc(rax, ptr [rsp + 5 * 8]); // (x1 * x2)_H + adc(rdx, 0); + } + + /* + c = [c3:y:c1:c0] = c + x[2..0] * y + q = uint64_t(c0 * pp) + c = (c + q * p) >> 64 + input [c3:c2:c1:c0], px, y, p + output [c0:c3:c2:c1] ; c0 == 0 unless isFullBit_ + + @note use rax, rdx, destroy y + */ + void montgomery3_sub(uint64_t pp, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0, + const Reg64& /*px*/, const Reg64& y, const Reg64& p, + const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst) + { + // input [c3:y:c1:0] + // [t4:c3:y:c1:c0] + // t4 = 0 or 1 if isFullBit_, = 0 otherwise + mov(rax, pp); + mul(c0); // q = rax + mov(c2, rax); + mul3x1(p, c2, t2, t1, t0, t3); + // [rdx:c2:t1:t0] = p * q + add(c0, t0); // always c0 is zero + adc(c1, t1); + adc(c2, y); + adc(c3, rdx); + if (isFullBit_) { + if (isFirst) { + setc(c0.cvt8()); + } else { + adc(c0.cvt8(), t4.cvt8()); + } + } + } + /* + c = [c3:c2:c1:c0] + c += x[2..0] * y + q = uint64_t(c0 * pp) + c = (c + q * p) >> 64 + input [c3:c2:c1:c0], px, y, p + output [c0:c3:c2:c1] ; c0 == 0 unless isFullBit_ + + @note use rax, rdx, destroy y + */ + void montgomery3_1(uint64_t pp, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0, + const Reg64& px, const Reg64& y, const Reg64& p, + const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst) + { + if (isFirst) { + mul3x1(px, y, c2, c1, c0, c3); + mov(c3, rdx); + // [c3:y:c1:c0] = px[2..0] * y + } else { + mul3x1(px, y, t2, t1, t0, t3); + // [rdx:y:t1:t0] = px[2..0] * y + add_rr(Pack(c3, y, c1, c0), Pack(rdx, c2, t1, t0)); + if (isFullBit_) setc(t4.cvt8()); + } + montgomery3_sub(pp, c3, c2, c1, c0, px, y, p, t0, t1, t2, t3, t4, isFirst); + } + /* + pc[0..n] += x[0..n-1] * y ; pc[] = 0 if isFirst + pc[n + 1] is temporary used if isFullBit_ + q = uint64_t(pc[0] * pp) + pc[] = (pc[] + q * p) >> 64 + input : pc[], px[], y, p[], pw1[], pw2[] + output : pc[0..n] ; if isFullBit_ + pc[0..n-1] ; if !isFullBit_ + destroy y + use + pw1[0] if useMulx_ + pw1[0..n-2] otherwise + pw2[0..n-1] + */ + void montgomeryN_1(uint64_t pp, int n, const RegExp& pc, const RegExp& px, const Reg64& y, const Reg64& p, const Reg64& t, const MixPack& pw1, const RegExp& pw2, bool isFirst) + { + // pc[] += x[] * y + if (isFirst) { + gen_raw_mulI(pc, px, y, pw1, t, n); + mov(ptr [pc + n * 8], rdx); + } else { + gen_raw_mulI(pw2, px, y, pw1, t, n); + mov(t, ptr [pw2 + 0 * 8]); + add(ptr [pc + 0 * 8], t); + for (int i = 1; i < n; i++) { + mov(t, ptr [pw2 + i * 8]); + adc(ptr [pc + i * 8], t); + } + adc(ptr [pc + n * 8], rdx); + if (isFullBit_) { + mov(t, 0); + adc(t, 0); + mov(qword [pc + (n + 1) * 8], t); + } + } + mov(rax, pp); + mul(qword [pc]); + mov(y, rax); // y = q + gen_raw_mulI(pw2, p, y, pw1, t, n); + // c[] = (c[] + pw2[]) >> 64 + mov(t, ptr [pw2 + 0 * 8]); + add(t, ptr [pc + 0 * 8]); + for (int i = 1; i < n; i++) { + mov(t, ptr [pw2 + i * 8]); + adc(t, ptr [pc + i * 8]); + mov(ptr [pc + (i - 1) * 8], t); + } + adc(rdx, ptr [pc + n * 8]); + mov(ptr [pc + (n - 1) * 8], rdx); + if (isFullBit_) { + if (isFirst) { + mov(t, 0); + } else { + mov(t, ptr [pc + (n + 1) * 8]); + } + adc(t, 0); + mov(qword [pc + n * 8], t); + } else { + xor_(eax, eax); + mov(ptr [pc + n * 8], rax); + } + } + /* + [rdx:x:t2:t1:t0] <- py[3:2:1:0] * x + destroy x, t + */ + void mul4x1(const RegExp& py, const Reg64& x, const Reg64& t3, const Reg64& t2, const Reg64& t1, const Reg64& t0, const Reg64& t) + { + if (useMulx_) { + mov(rdx, x); + mulx(t1, t0, ptr [py + 8 * 0]); + mulx(t2, rax, ptr [py + 8 * 1]); + add(t1, rax); + mulx(x, rax, ptr [py + 8 * 2]); + adc(t2, rax); + mulx(rdx, rax, ptr [py + 8 * 3]); + adc(x, rax); + adc(rdx, 0); + } else { + mov(rax, ptr [py]); + mul(x); + mov(t0, rax); + mov(t1, rdx); + mov(rax, ptr [py + 8]); + mul(x); + mov(t, rax); + mov(t2, rdx); + mov(rax, ptr [py + 8 * 2]); + mul(x); + mov(t3, rax); + mov(rax, x); + mov(x, rdx); + mul(qword [py + 8 * 3]); + add(t1, t); + adc(t2, t3); + adc(x, rax); + adc(rdx, 0); + } + } + + /* + c = [c4:c3:c2:c1:c0] + c += x[3..0] * y + q = uint64_t(c0 * pp) + c = (c + q * p) >> 64 + input [c4:c3:c2:c1:c0], px, y, p + output [c0:c4:c3:c2:c1] + + @note use rax, rdx, destroy y + use xt if isFullBit_ + */ + void montgomery4_1(uint64_t pp, const Reg64& c4, const Reg64& c3, const Reg64& c2, const Reg64& c1, const Reg64& c0, + const Reg64& px, const Reg64& y, const Reg64& p, + const Reg64& t0, const Reg64& t1, const Reg64& t2, const Reg64& t3, const Reg64& t4, bool isFirst, const Xmm& xt) + { + if (isFirst) { + mul4x1(px, y, c3, c2, c1, c0, c4); + mov(c4, rdx); + // [c4:y:c2:c1:c0] = px[3..0] * y + } else { + mul4x1(px, y, t3, t2, t1, t0, t4); + // [rdx:y:t2:t1:t0] = px[3..0] * y + if (isFullBit_) { + movq(xt, px); + xor_(px, px); + } + add_rr(Pack(c4, y, c2, c1, c0), Pack(rdx, c3, t2, t1, t0)); + if (isFullBit_) { + adc(px, 0); + } + } + // [px:c4:y:c2:c1:c0] + // px = 0 or 1 if isFullBit_, = 0 otherwise + mov(rax, pp); + mul(c0); // q = rax + mov(c3, rax); + mul4x1(p, c3, t3, t2, t1, t0, t4); + add(c0, t0); // always c0 is zero + adc(c1, t1); + adc(c2, t2); + adc(c3, y); + adc(c4, rdx); + if (isFullBit_) { + if (isFirst) { + adc(c0, 0); + } else { + adc(c0, px); + movq(px, xt); + } + } + } +}; + +} // mcl + +#endif diff --git a/include/mcl/fp_util.hpp b/include/mcl/fp_util.hpp new file mode 100644 index 0000000..7419672 --- /dev/null +++ b/include/mcl/fp_util.hpp @@ -0,0 +1,294 @@ +#pragma once +#include +#include +#include +#include +/** + @file + @brief utility of Fp + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ + +namespace mcl { namespace fp { + +#if defined(CYBOZU_OS_BIT) && (CYBOZU_OS_BIT == 32) + typedef uint32_t BlockType; +#else + typedef uint64_t BlockType; +#endif + +template +void setBlockBit(S *buf, size_t bitLen, bool b) +{ + const size_t unitSize = sizeof(S) * 8; + const size_t q = bitLen / unitSize; + const size_t r = bitLen % unitSize; + if (b) { + buf[q] |= S(1) << r; + } else { + buf[q] &= ~(S(1) << r); + } +} +template +bool getBlockBit(const S *buf, size_t bitLen) +{ + const size_t unitSize = sizeof(S) * 8; + const size_t q = bitLen / unitSize; + const size_t r = bitLen % unitSize; + return (buf[q] & (S(1) << r)) != 0; +} +/* + convert x[0..n) to hex string + start "0x" if withPrefix +*/ +template +void toStr16(std::string& str, const T *x, size_t n, bool withPrefix = false) +{ + size_t fullN = 0; + if (n > 1) { + size_t pos = n - 1; + while (pos > 0) { + if (x[pos]) break; + pos--; + } + if (pos > 0) fullN = pos; + } + const T v = n == 0 ? 0 : x[fullN]; + const size_t topLen = cybozu::getHexLength(v); + const size_t startPos = withPrefix ? 2 : 0; + const size_t lenT = sizeof(T) * 2; + str.resize(startPos + fullN * lenT + topLen); + if (withPrefix) { + str[0] = '0'; + str[1] = 'x'; + } + cybozu::itohex(&str[startPos], topLen, v, false); + for (size_t i = 0; i < fullN; i++) { + cybozu::itohex(&str[startPos + topLen + i * lenT], lenT, x[fullN - 1 - i], false); + } +} + +/* + convert x[0..n) to bin string + start "0b" if withPrefix +*/ +template +void toStr2(std::string& str, const T *x, size_t n, bool withPrefix) +{ + size_t fullN = 0; + if (n > 1) { + size_t pos = n - 1; + while (pos > 0) { + if (x[pos]) break; + pos--; + } + if (pos > 0) fullN = pos; + } + const T v = n == 0 ? 0 : x[fullN]; + const size_t topLen = cybozu::getBinLength(v); + const size_t startPos = withPrefix ? 2 : 0; + const size_t lenT = sizeof(T) * 8; + str.resize(startPos + fullN * lenT + topLen); + if (withPrefix) { + str[0] = '0'; + str[1] = 'b'; + } + cybozu::itobin(&str[startPos], topLen, v); + for (size_t i = 0; i < fullN; i++) { + cybozu::itobin(&str[startPos + topLen + i * lenT], lenT, x[fullN - 1 - i]); + } +} + +/* + convert hex string to x[0..xn) + hex string = [0-9a-fA-F]+ +*/ +template +void fromStr16(T *x, size_t xn, const char *str, size_t strLen) +{ + if (strLen == 0) throw cybozu::Exception("fp:fromStr16:strLen is zero"); + const size_t unitLen = sizeof(T) * 2; + const size_t q = strLen / unitLen; + const size_t r = strLen % unitLen; + const size_t requireSize = q + (r ? 1 : 0); + if (xn < requireSize) throw cybozu::Exception("fp:fromStr16:short size") << xn << requireSize; + for (size_t i = 0; i < q; i++) { + bool b; + x[i] = cybozu::hextoi(&b, &str[r + (q - 1 - i) * unitLen], unitLen); + if (!b) throw cybozu::Exception("fp:fromStr16:bad char") << cybozu::exception::makeString(str, strLen); + } + if (r) { + bool b; + x[q] = cybozu::hextoi(&b, str, r); + if (!b) throw cybozu::Exception("fp:fromStr16:bad char") << cybozu::exception::makeString(str, strLen); + } + for (size_t i = requireSize; i < xn; i++) x[i] = 0; +} + +/* + @param base [inout] +*/ +inline const char *verifyStr(bool *isMinus, int *base, const std::string& str) +{ + const char *p = str.c_str(); + if (*p == '-') { + *isMinus = true; + p++; + } else { + *isMinus = false; + } + if (p[0] == '0') { + if (p[1] == 'x') { + if (*base != 0 && *base != 16) { + throw cybozu::Exception("fp:verifyStr:bad base") << *base << str; + } + *base = 16; + p += 2; + } else if (p[1] == 'b') { + if (*base != 0 && *base != 2) { + throw cybozu::Exception("fp:verifyStr:bad base") << *base << str; + } + *base = 2; + p += 2; + } + } + if (*base == 0) *base = 10; + if (*p == '\0') throw cybozu::Exception("fp:verifyStr:str is empty"); + return p; +} + +template +size_t getRoundNum(size_t x) +{ + const size_t size = sizeof(S) * 8; + return (x + size - 1) / size; +} + +/* + compare x[0, n) with y[0, n) +*/ +template +int compareArray(const S* x, const S* y, size_t n) +{ + for (size_t i = 0; i < n; i++) { + const S a = x[n - 1 - i]; + const S b = y[n - 1 - i]; + if (a > b) return 1; + if (a < b) return -1; + } + return 0; +} + +/* + get random value less than in[] + n = (bitLen + sizeof(S) * 8) / (sizeof(S) * 8) + input in[0..n) + output out[n..n) + 0 <= out < in +*/ +template +inline void getRandVal(S *out, RG& rg, const S *in, size_t bitLen) +{ + const size_t unitBitSize = sizeof(S) * 8; + const size_t n = getRoundNum(bitLen); + const size_t rem = bitLen & (unitBitSize - 1); + for (;;) { + rg.read(out, n); + if (rem > 0) out[n - 1] &= (S(1) << rem) - 1; + if (compareArray(out, in, n) < 0) return; + } +} + +/* + z[] = (x[] << shift) | y + @param z [out] z[0..n) + @param x [in] x[0..n) + @param n [in] length of x, z + @param shift [in] 0 <= shift < (sizeof(S) * 8) + @param y [in] + @return (x[] << shift)[n] +*/ +template +S shiftLeftOr(S* z, const S* x, size_t n, size_t shift, S y = 0) +{ + if (n == 0) { + throw cybozu::Exception("fp:shiftLeftOr:bad n"); + } + if (shift == 0) { + for (size_t i = n - 1; i > 0; i--) { + z[i] = x[i]; + } + z[0] = x[0] | y; + return 0; + } + const size_t unitSize = sizeof(S) * 8; + if (shift >= unitSize) { + throw cybozu::Exception("fp:shiftLeftOr:large shift") << shift; + } + const size_t rev = unitSize - shift; + S ret = x[n - 1] >> rev; + for (size_t i = n - 1; i > 0; i--) { + z[i] = (x[i] << shift) | (x[i - 1] >> rev); + } + z[0] = (x[0] << shift) | y; + return ret; +} +template +void shiftRight(S* z, const S* x, size_t n, size_t shift) +{ + if (n == 0) return; + if (shift == 0) { + for (size_t i = 0; i < n; i++) { + z[i] = x[i]; + } + return; + } + const size_t unitSize = sizeof(S) * 8; + if (shift >= unitSize) { + throw cybozu::Exception("fp:shiftRight:large shift") << shift; + } + const size_t rev = unitSize - shift; + S prev = x[0]; + for (size_t i = 0; i < n - 1; i++) { + S t = x[i + 1]; + z[i] = (prev >> shift) | (t << rev); + prev = t; + } + z[n - 1] = prev >> shift; +} + +template +size_t splitBitVec(Vec& v, const cybozu::BitVectorT& bv, size_t width) +{ + if (width > sizeof(typename Vec::value_type) * 8) { + throw cybozu::Exception("fp:splitBitVec:bad width") << width; + } + const size_t q = bv.size() / width; + const size_t r = bv.size() % width; + for (size_t i = 0; i < q; i++) { + v.push_back(bv.extract(i * width, width)); + } + if (r > 0) { + v.push_back(bv.extract(q * width, r)); + } + return r ? r : width; +} + +template +void concatBitVec(cybozu::BitVectorT& bv, const Vec& v, size_t width, size_t lastWidth) +{ + if (width > sizeof(typename Vec::value_type) * 8) { + throw cybozu::Exception("fp:splitBitVec:bad width") << width; + } + bv.clear(); + for (size_t i = 0; i < v.size() - 1; i++) { + bv.append(v[i], width); + } + bv.append(v[v.size() - 1], lastWidth); +} + +} // mcl::fp + +} // fp diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp new file mode 100644 index 0000000..c29c870 --- /dev/null +++ b/include/mcl/gmp_util.hpp @@ -0,0 +1,378 @@ +#pragma once +/** + @file + @brief util function for gmp + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include +#include +#include +#include +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4616) + #pragma warning(disable : 4800) + #pragma warning(disable : 4244) + #pragma warning(disable : 4127) + #pragma warning(disable : 4512) + #pragma warning(disable : 4146) +#endif +#include +#include +#ifdef _MSC_VER + #pragma warning(pop) +#endif +#ifdef _MSC_VER +#if _MSC_VER == 1900 +#ifdef _DEBUG +#pragma comment(lib, "14/mpird.lib") +#pragma comment(lib, "14/mpirxxd.lib") +#else +#pragma comment(lib, "14/mpir.lib") +#pragma comment(lib, "14/mpirxx.lib") +#endif +#elif _MSC_VER == 1800 +#ifdef _DEBUG +#pragma comment(lib, "12/mpird.lib") +#pragma comment(lib, "12/mpirxxd.lib") +#else +#pragma comment(lib, "12/mpir.lib") +#pragma comment(lib, "12/mpirxx.lib") +#endif +#else +#ifdef _DEBUG +#pragma comment(lib, "mpird.lib") +#pragma comment(lib, "mpirxxd.lib") +#else +#pragma comment(lib, "mpir.lib") +#pragma comment(lib, "mpirxx.lib") +#endif +#endif +#endif +#include + +namespace mcl { + +struct Gmp { + typedef mpz_class ImplType; +#if CYBOZU_OS_BIT == 64 + typedef uint64_t BlockType; +#else + typedef uint32_t BlockType; +#endif + // z = [buf[n-1]:..:buf[1]:buf[0]] + // eg. buf[] = {0x12345678, 0xaabbccdd}; => z = 0xaabbccdd12345678; + template + static void setRaw(mpz_class& z, const T *buf, size_t n) + { + mpz_import(z.get_mpz_t(), n, -1, sizeof(*buf), 0, 0, buf); + } + /* + return positive written size + return 0 if failure + */ + template + static size_t getRaw(T *buf, size_t maxSize, const mpz_class& x) + { + const size_t totalSize = sizeof(T) * maxSize; + if (getBitLen(x) > totalSize * 8) return 0; + memset(buf, 0, sizeof(*buf) * maxSize); + size_t size; + mpz_export(buf, &size, -1, sizeof(T), 0, 0, x.get_mpz_t()); + // if x == 0, then size = 0 for gmp, size = 1 for mpir + return size == 0 ? 1 : size; + } + static inline void set(mpz_class& z, uint64_t x) + { + setRaw(z, &x, 1); + } + static inline bool fromStr(mpz_class& z, const std::string& str, int base = 0) + { + return z.set_str(str, base) == 0; + } + static inline void toStr(std::string& str, const mpz_class& z, int base = 10) + { + str = z.get_str(base); + } + static inline void add(mpz_class& z, const mpz_class& x, const mpz_class& y) + { + mpz_add(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); + } + static inline void add(mpz_class& z, const mpz_class& x, unsigned int y) + { + mpz_add_ui(z.get_mpz_t(), x.get_mpz_t(), y); + } + static inline void sub(mpz_class& z, const mpz_class& x, const mpz_class& y) + { + mpz_sub(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); + } + static inline void sub(mpz_class& z, const mpz_class& x, unsigned int y) + { + mpz_sub_ui(z.get_mpz_t(), x.get_mpz_t(), y); + } + static inline void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) + { + mpz_mul(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); + } + static inline void square(mpz_class& z, const mpz_class& x) + { + mpz_mul(z.get_mpz_t(), x.get_mpz_t(), x.get_mpz_t()); + } + static inline void mul(mpz_class& z, const mpz_class& x, unsigned int y) + { + mpz_mul_ui(z.get_mpz_t(), x.get_mpz_t(), y); + } + static inline void divmod(mpz_class& q, mpz_class& r, const mpz_class& x, const mpz_class& y) + { + mpz_divmod(q.get_mpz_t(), r.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); + } + static inline void div(mpz_class& q, const mpz_class& x, const mpz_class& y) + { + mpz_div(q.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); + } + static inline void div(mpz_class& q, const mpz_class& x, unsigned int y) + { + mpz_div_ui(q.get_mpz_t(), x.get_mpz_t(), y); + } + static inline void mod(mpz_class& r, const mpz_class& x, const mpz_class& m) + { + mpz_mod(r.get_mpz_t(), x.get_mpz_t(), m.get_mpz_t()); + } + static inline void mod(mpz_class& r, const mpz_class& x, unsigned int m) + { + mpz_mod_ui(r.get_mpz_t(), x.get_mpz_t(), m); + } + static inline void clear(mpz_class& z) + { + mpz_set_ui(z.get_mpz_t(), 0); + } + static inline bool isZero(const mpz_class& z) + { + return mpz_sgn(z.get_mpz_t()) == 0; + } + static inline bool isNegative(const mpz_class& z) + { + return mpz_sgn(z.get_mpz_t()) < 0; + } + static inline void neg(mpz_class& z, const mpz_class& x) + { + mpz_neg(z.get_mpz_t(), x.get_mpz_t()); + } + static inline int compare(const mpz_class& x, const mpz_class & y) + { + return mpz_cmp(x.get_mpz_t(), y.get_mpz_t()); + } + static inline int compare(const mpz_class& x, int y) + { + return mpz_cmp_si(x.get_mpz_t(), y); + } + template + static inline void addMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m) + { + add(z, x, y); + if (compare(z, m) >= 0) { + sub(z, z, m); + } + } + template + static inline void subMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m) + { + sub(z, x, y); + if (!isNegative(z)) return; + add(z, z, m); + } + template + static inline void mulMod(mpz_class& z, const mpz_class& x, const T& y, const mpz_class& m) + { + mul(z, x, y); + mod(z, z, m); + } + static inline void squareMod(mpz_class& z, const mpz_class& x, const mpz_class& m) + { + square(z, x); + mod(z, z, m); + } + // z = x^y (y >= 0) + static inline void pow(mpz_class& z, const mpz_class& x, unsigned int y) + { + mpz_pow_ui(z.get_mpz_t(), x.get_mpz_t(), y); + } + // z = x^y mod m (y >=0) + static inline void powMod(mpz_class& z, const mpz_class& x, const mpz_class& y, const mpz_class& m) + { + mpz_powm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t(), m.get_mpz_t()); + } + // z = 1/x mod m + static inline void invMod(mpz_class& z, const mpz_class& x, const mpz_class& m) + { + mpz_invert(z.get_mpz_t(), x.get_mpz_t(), m.get_mpz_t()); + } + // z = lcm(x, y) + static inline void lcm(mpz_class& z, const mpz_class& x, const mpz_class& y) + { + mpz_lcm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); + } + static inline mpz_class lcm(const mpz_class& x, const mpz_class& y) + { + mpz_class z; + lcm(z, x, y); + return z; + } + // z = gcd(x, y) + static inline void gcd(mpz_class& z, const mpz_class& x, const mpz_class& y) + { + mpz_gcd(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t()); + } + static inline mpz_class gcd(const mpz_class& x, const mpz_class& y) + { + mpz_class z; + gcd(z, x, y); + return z; + } + /* + assume p : odd prime + return 1 if x^2 = a mod p for some x + return -1 if x^2 != a mod p for any x + */ + static inline int legendre(const mpz_class& a, const mpz_class& p) + { + return mpz_legendre(a.get_mpz_t(), p.get_mpz_t()); + } + static inline bool isPrime(const mpz_class& x) + { + return mpz_probab_prime_p(x.get_mpz_t(), 25) != 0; + } + static inline size_t getBitLen(const mpz_class& x) + { + return mpz_sizeinbase(x.get_mpz_t(), 2); + } + static inline BlockType getBlock(const mpz_class& x, size_t i) + { + return x.get_mpz_t()->_mp_d[i]; + } + static inline const BlockType *getBlock(const mpz_class& x) + { + return reinterpret_cast(x.get_mpz_t()->_mp_d); + } + static inline size_t getBlockSize(const mpz_class& x) + { + assert(x.get_mpz_t()->_mp_size >= 0); + return x.get_mpz_t()->_mp_size; + } + template + static inline void getRand(mpz_class& z, size_t bitLen, RG& rg) + { + assert(bitLen > 1); + const size_t rem = bitLen & 31; + const size_t n = (bitLen + 31) / 32; + std::vector buf(n); + rg.read(buf.data(), n); + uint32_t v = buf[n - 1]; + if (rem == 0) { + v |= 1U << 31; + } else { + v &= (1U << rem) - 1; + v |= 1U << (rem - 1); + } + buf[n - 1] = v; + Gmp::setRaw(z, &buf[0], n); + } + template + static void getRandPrime(mpz_class& z, size_t bitLen, RG& rg, bool setSecondBit = false, bool mustBe3mod4 = false) + { + assert(bitLen > 2); + do { + getRand(z, bitLen, rg); + if (setSecondBit) { + z |= mpz_class(1) << (bitLen - 2); + } + if (mustBe3mod4) { + z |= 3; + } + } while (!(isPrime(z))); + } +}; + +/* + Tonelli-Shanks +*/ +class SquareRoot { + bool isPrime; + mpz_class p; + mpz_class g; + int r; + mpz_class q; // p - 1 = 2^r q + mpz_class s; // s = g^q +public: + SquareRoot() : isPrime(false) {} + void set(const mpz_class& p) + { + if (p <= 2) throw cybozu::Exception("SquareRoot:bad p") << p; + isPrime = Gmp::isPrime(p); + if (!isPrime) return; // don't throw until get() is called + this->p = p; + // g is quadratic nonresidue + g = 2; + while (Gmp::legendre(g, p) > 0) { + g++; + } + // p - 1 = 2^r q, q is odd + r = 0; + q = p - 1; + while ((q & 1) == 0) { + r++; + q /= 2; + } + Gmp::powMod(s, g, q, p); + } + /* + solve x^2 = a mod p + */ + bool get(mpz_class& x, const mpz_class& a) const + { + if (!isPrime) throw cybozu::Exception("SquareRoot:get:not prime") << p; + if (Gmp::legendre(a, p) < 0) return false; + if (r == 1) { + Gmp::powMod(x, a, (p + 1) / 4, p); + return true; + } + mpz_class c = s, d; + int e = r; + Gmp::powMod(d, a, q, p); + Gmp::powMod(x, a, (q + 1) / 2, p); // destroy a if &x == &a + while (d != 1) { + int i = 1; + mpz_class dd = (d * d) % p; + while (dd != 1) { + dd = (dd * dd) % p; + i++; + } + mpz_class b = 1; + b <<= e - i - 1; + Gmp::powMod(b, c, b, p); + x = (x * b) % p; + c = (b * b) % p; + d = (d * c) % p; + e = i; + } + return true; + } +}; + +namespace ope { + +template<> +struct Optimized { + void init(const mpz_class&) {} + bool hasPowMod() const { return true; } + static void powMod(mpz_class& z, const mpz_class& x, const mpz_class& y, const mpz_class& m) + { + mpz_powm(z.get_mpz_t(), x.get_mpz_t(), y.get_mpz_t(), m.get_mpz_t()); + } +}; + +} // mcl::ope + +} // mcl diff --git a/include/mcl/mont_fp.hpp b/include/mcl/mont_fp.hpp new file mode 100644 index 0000000..1b539bf --- /dev/null +++ b/include/mcl/mont_fp.hpp @@ -0,0 +1,463 @@ +#pragma once +/** + @file + @brief Fp with montgomery(EXPERIMENTAL IMPLEMENTAION) + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause + + @note this class should be merged to FpT +*/ +#include +#include +#include +#include +#include + +namespace mcl { + +template +class MontFpT : public ope::addsub, + ope::mulable, + ope::invertible, + ope::hasNegative, + ope::hasIO > > > > > { + + static mpz_class pOrg_; + static mcl::SquareRoot sq_; + static MontFpT p_; + static MontFpT one_; + static MontFpT R_; // (1 << (N * 64)) % p + static MontFpT RR_; // (R * R) % p + static MontFpT invTbl_[N * 64 * 2]; + static size_t modBitLen_; +public: + static FpGenerator fg_; +private: + uint64_t v_[N]; + void fromRawGmp(const mpz_class& x) + { + if (Gmp::getRaw(v_, N, x) == 0) { + throw cybozu::Exception("MontFpT:fromRawGmp") << x; + } + } + template + void setMaskMod(std::vector& buf) + { + assert(buf.size() * sizeof(S) * 8 <= modBitLen_); + assert(!buf.empty()); + fp::maskBuffer(&buf[0], buf.size(), modBitLen_); + memcpy(v_, &buf[0], buf.size() * sizeof(S)); + if (compare(*this, p_) >= 0) { + subNc(v_, v_, p_.v_); + } + assert(compare(*this, p_) < 0); + } + static void initInvTbl(MontFpT *invTbl) + { + MontFpT t(2); + const int n = N * 64 * 2; + for (int i = 0; i < n; i++) { + invTbl[n - 1 - i] = t; + t += t; + } + } + typedef void (*void3op)(MontFpT&, const MontFpT&, const MontFpT&); + typedef bool (*bool3op)(MontFpT&, const MontFpT&, const MontFpT&); + typedef void (*void2op)(MontFpT&, const MontFpT&); + typedef int (*int2op)(MontFpT&, const MontFpT&); +public: + static const size_t BlockSize = N; + typedef uint64_t BlockType; + MontFpT() {} + MontFpT(int x) { operator=(x); } + MontFpT(uint64_t x) { operator=(x); } + explicit MontFpT(const std::string& str, int base = 0) + { + fromStr(str, base); + } + MontFpT& operator=(int x) + { + if (x == 0) { + clear(); + } else { + v_[0] = abs(x); + for (size_t i = 1; i < N; i++) v_[i] = 0; + mul(*this, *this, RR_); + if (x < 0) { + neg(*this, *this); + } + } + return *this; + } + MontFpT& operator=(uint64_t x) + { + v_[0] = x; + for (size_t i = 1; i < N; i++) v_[i] = 0; + mul(*this, *this, RR_); + return *this; + } + void fromStr(const std::string& str, int base = 0) + { + bool isMinus; + const char *p = fp::verifyStr(&isMinus, &base, str); + + if (base == 16) { + MontFpT t; + mcl::fp::fromStr16(t.v_, N, p, str.size() - (p - str.c_str())); + if (compare(t, p_) >= 0) throw cybozu::Exception("fp:MontFpT:str is too large") << str; + mul(*this, t, RR_); + } else { + mpz_class t; + if (!Gmp::fromStr(t, p, base)) { + throw cybozu::Exception("fp:MontFpT:fromStr") << str; + } + toMont(*this, t); + } + if (isMinus) { + neg(*this, *this); + } + } + void put() const + { + for (int i = N - 1; i >= 0; i--) { + printf("%016llx ", v_[i]); + } + printf("\n"); + } + void set(const std::string& str, int base = 0) { fromStr(str, base); } + void toStr(std::string& str, int base = 10, bool withPrefix = false) const + { + if (isZero()) { + str = "0"; + return; + } + if (base == 16 || base == 2) { + MontFpT t; + mul(t, *this, one_); + if (base == 16) { + mcl::fp::toStr16(str, t.v_, N, withPrefix); + } else { + mcl::fp::toStr2(str, t.v_, N, withPrefix); + } + return; + } + if (base != 10) throw cybozu::Exception("fp:MontFpT:toStr:bad base") << base; + // QQQ : remove conversion to gmp + mpz_class t; + fromMont(t, *this); + Gmp::toStr(str, t, base); + } + std::string toStr(int base = 10, bool withPrefix = false) const + { + std::string str; + toStr(str, base, withPrefix); + return str; + } + void clear() + { + for (size_t i = 0; i < N; i++) v_[i] = 0; + } + template + void setRand(RG& rg) + { + fp::getRandVal(v_, rg, p_.v_, modBitLen_); + } + template + void setRaw(const S *inBuf, size_t n) + { + n = std::min(n, fp::getRoundNum(modBitLen_)); + if (n == 0) { + clear(); + return; + } + std::vector buf(inBuf, inBuf + n); + setMaskMod(buf); + } + static inline void setModulo(const std::string& pstr, int base = 0) + { + bool isMinus; + const char *p = fp::verifyStr(&isMinus, &base, pstr); + if (isMinus) throw cybozu::Exception("MontFp:setModulo:mstr is not pinus") << pstr; + if (!Gmp::fromStr(pOrg_, p, base)) { + throw cybozu::Exception("fp:MontFpT:setModulo") << pstr << base; + } + modBitLen_ = Gmp::getBitLen(pOrg_); + if (fp::getRoundNum(modBitLen_) > N) { + throw cybozu::Exception("MontFp:setModulo:bad prime length") << pstr; + } + p_.fromRawGmp(pOrg_); + sq_.set(pOrg_); + + mpz_class t = 1; + one_.fromRawGmp(t); + t = (t << (N * 64)) % pOrg_; + R_.fromRawGmp(t); + t = (t * t) % pOrg_; + RR_.fromRawGmp(t); + fg_.init(p_.v_, N); + add = Xbyak::CastTo(fg_.add_); + sub = Xbyak::CastTo(fg_.sub_); + mul = Xbyak::CastTo(fg_.mul_); + square = Xbyak::CastTo(fg_.sqr_); + if (square == 0) square = squareC; + neg = Xbyak::CastTo(fg_.neg_); + shr1 = Xbyak::CastTo(fg_.shr1_); + addNc = Xbyak::CastTo(fg_.addNc_); + subNc = Xbyak::CastTo(fg_.subNc_); + preInv = Xbyak::CastTo(fg_.preInv_); + initInvTbl(invTbl_); + } + static inline void getModulo(std::string& pstr) + { + Gmp::toStr(pstr, pOrg_); + } + static inline bool isYodd(const MontFpT& y) + { +#if 0 + return (y.v_[0] & 1) == 1; +#else + MontFpT t; // QQQ : is necessary? + mul(t, y, one_); + return (t.v_[0] & 1) == 1; +#endif + } + static inline bool squareRoot(MontFpT& y, const MontFpT& x) + { + mpz_class t; + fromMont(t, x); + if (!sq_.get(t, t)) return false; + toMont(y, t); + return true; + } + static inline void fromMont(mpz_class& z, const MontFpT& x) + { + MontFpT t; + mul(t, x, one_); + Gmp::setRaw(z, t.v_, N); + } + static inline void toMont(MontFpT& z, const mpz_class& x) + { + if (x >= pOrg_) throw cybozu::Exception("fp:MontFpT:toMont:large x") << x; + MontFpT t; + t.fromRawGmp(x); + mul(z, t, RR_); + } + static void3op add; + static void3op sub; + static void3op mul; + static void2op square; + static void2op neg; + static void2op shr1; + static bool3op addNc; + static bool3op subNc; + static int2op preInv; + static inline void squareC(MontFpT& z, const MontFpT& x) + { + mul(z, x, x); + } + static inline int preInvC(MontFpT& r, const MontFpT& x) + { + MontFpT u, v, s; + u = p_; + v = x; + r.clear(); + s.clear(); s.v_[0] = 1; // s is real 1 + int k = 0; + // u, v : Pack, r, s : mem + bool rTop = false; + LP: + if (v.isZero()) goto EXIT; + if ((u.v_[0] & 1) == 0) { + goto U_EVEN; + } + if ((v.v_[0] & 1) == 0) { + goto V_EVEN; + } + if (compare(v, u) < 0) { + goto V_LT_U; + } + subNc(v, v, u); // sub_rr + addNc(s, s, r); // add_mm + V_EVEN: + shr1(v, v); // shr1_r + rTop = addNc(r, r, r); // twice_m + k++; + goto LP; + V_LT_U: + subNc(u, u, v); // sub_rr + rTop = addNc(r, r, s); // add_mm + U_EVEN: + shr1(u, u); // shr1_r + addNc(s, s, s); // twice_m + k++; + goto LP; + EXIT:; + if (rTop) subNc(r, r, p_); + if (subNc(r, p_, r)) { + addNc(r, r, p_); + } + return k; + } + static inline void inv(MontFpT& z, const MontFpT& x) + { +#if 1 + MontFpT r; +#if 1 + int k = preInv(r, x); +#else + MontFpT s; + int h = preInvC(s, x); + int k = preInv(r, x); + if (r != s || k != h) { + std::cout << std::hex; + PUT(x); + PUT(r); + PUT(s); + printf("k=%d, h=%d\n", k, h); + exit(1); + } +#endif + /* + xr = 2^k + R = 2^(N * 64) + get r2^(-k)R^2 = r 2^(N * 64 * 2 - k) + */ + mul(z, r, invTbl_[k]); +#else + mpz_class t; + fromMont(t, x); + Gmp::invMod(t, t, pOrg_); + toMont(z, t); +#endif + } + static inline void div(MontFpT& z, const MontFpT& x, const MontFpT& y) + { + MontFpT ry; + inv(ry, y); + mul(z, x, ry); + } +#if 0 + static inline BlockType getBlock(const MontFpT& x, size_t i) + { + return Gmp::getBlock(x.v, i); + } + static inline const BlockType *getBlock(const MontFpT& x) + { + return Gmp::getBlock(x.v); + } + static inline size_t getBlockSize(const MontFpT& x) + { + return Gmp::getBlockSize(x.v); + } + static inline void shr(MontFpT& z, const MontFpT& x, size_t n) + { + z.v = x.v >> n; + } +#endif + /* + append to bv(not clear bv) + */ + void appendToBitVec(cybozu::BitVector& bv) const + { + MontFpT t; + MontFpT::mul(t, *this, MontFpT::one_); + bv.append(t.v_, modBitLen_); + } + void fromBitVec(const cybozu::BitVector& bv) + { + const size_t bitLen = bv.size(); + if (bitLen != modBitLen_) throw cybozu::Exception("MontFp:fromBitVec:bad size") << bitLen << modBitLen_; + const size_t blockN = cybozu::RoundupBit(bitLen); + const MontFpT* src; + MontFpT t; + if (blockN == N) { + src = (const MontFpT*)bv.getBlock(); + } else { + cybozu::CopyBit(t.v_, bv.getBlock(), bitLen); + for (size_t i = blockN; i < N; i++) t.v_[i] = 0; + src = &t; + } + mul(*this, *src, RR_); + if (compare(*this, p_) >= 0) { + throw cybozu::Exception("MontFpT:fromBitVec:large x") << *this << p_; + } + } + static inline size_t getBitVecSize() { return modBitLen_; } + static inline int compare(const MontFpT& x, const MontFpT& y) + { + return fp::compareArray(x.v_, y.v_, N); + } + static inline bool isZero(const MontFpT& x) + { + if (x.v_[0]) return false; + uint64_t r = 0; + for (size_t i = 1; i < N; i++) { + r |= x.v_[i]; + } + return r == 0; + } + bool isZero() const { return isZero(*this); } + template + static void power(MontFpT& z, const MontFpT& x, const Z& y) + { + power_impl::power(z, x, y); + } + const uint64_t* getInnerValue() const { return v_; } + bool operator==(const MontFpT& rhs) const { return compare(*this, rhs) == 0; } + bool operator!=(const MontFpT& rhs) const { return compare(*this, rhs) != 0; } + static inline size_t getModBitLen() { return modBitLen_; } + static inline uint64_t cvtInt(const MontFpT& x, bool *err = 0) + { + MontFpT t; + mul(t, x, one_); + for (size_t i = 1; i < N; i++) { + if (t.v_[i]) { + if (err) { + *err = true; + return 0; + } else { + throw cybozu::Exception("MontFp:cvtInt:too large") << x; + } + } + } + if (err) *err = false; + return t.v_[0]; + } + uint64_t cvtInt(bool *err = 0) const { return cvtInt(*this, err); } +}; + +templatempz_class MontFpT::pOrg_; +templatemcl::SquareRoot MontFpT::sq_; +templateMontFpT MontFpT::p_; +templateMontFpT MontFpT::one_; +templateMontFpT MontFpT::R_; +templateMontFpT MontFpT::RR_; +templateMontFpT MontFpT::invTbl_[N * 64 * 2]; +templateFpGenerator MontFpT::fg_; +templatesize_t MontFpT::modBitLen_; + +templatetypename MontFpT::void3op MontFpT::add; +templatetypename MontFpT::void3op MontFpT::sub; +templatetypename MontFpT::void3op MontFpT::mul; +templatetypename MontFpT::void2op MontFpT::square; +templatetypename MontFpT::void2op MontFpT::neg; +templatetypename MontFpT::void2op MontFpT::shr1; +templatetypename MontFpT::bool3op MontFpT::addNc; +templatetypename MontFpT::bool3op MontFpT::subNc; +templatetypename MontFpT::int2op MontFpT::preInv; + +} // mcl + +namespace std { CYBOZU_NAMESPACE_TR1_BEGIN +template struct hash; + +template +struct hash > { + size_t operator()(const mcl::MontFpT& x, uint64_t v = 0) const + { + return static_cast(cybozu::hash64(x.getInnerValue(), N, v)); + } +}; + +CYBOZU_NAMESPACE_TR1_END } // std::tr1 diff --git a/include/mcl/operator.hpp b/include/mcl/operator.hpp new file mode 100644 index 0000000..f5d0df3 --- /dev/null +++ b/include/mcl/operator.hpp @@ -0,0 +1,118 @@ +#pragma once +/** + @file + @brief operator + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include +#include + +#ifdef _WIN32 + #ifndef MCL_FORCE_INLINE + #define MCL_FORCE_INLINE __forceinline + #endif + #pragma warning(push) + #pragma warning(disable : 4714) +#else + #ifndef MCL_FORCE_INLINE + #define MCL_FORCE_INLINE __attribute__((always_inline)) + #endif +#endif + +namespace mcl { namespace ope { + +template +struct Empty {}; + +/* + T must have compare +*/ +template > +struct comparable : E { + friend MCL_FORCE_INLINE bool operator<(const T& x, const T& y) { return T::compare(x, y) < 0; } + friend MCL_FORCE_INLINE bool operator>=(const T& x, const T& y) { return !operator<(x, y); } + + friend MCL_FORCE_INLINE bool operator>(const T& x, const T& y) { return T::compare(x, y) > 0; } + friend MCL_FORCE_INLINE bool operator<=(const T& x, const T& y) { return !operator>(x, y); } + friend MCL_FORCE_INLINE bool operator==(const T& x, const T& y) { return T::compare(x, y) == 0; } + friend MCL_FORCE_INLINE bool operator!=(const T& x, const T& y) { return !operator==(x, y); } +}; + +/* + T must have add, sub +*/ +template > +struct addsub : E { + template MCL_FORCE_INLINE T& operator+=(const S& rhs) { T::add(static_cast(*this), static_cast(*this), rhs); return static_cast(*this); } + template MCL_FORCE_INLINE T& operator-=(const S& rhs) { T::sub(static_cast(*this), static_cast(*this), rhs); return static_cast(*this); } + template friend MCL_FORCE_INLINE T operator+(const T& a, const S& b) { T c; T::add(c, a, b); return c; } + template friend MCL_FORCE_INLINE T operator-(const T& a, const S& b) { T c; T::sub(c, a, b); return c; } +}; + +/* + T must have mul +*/ +template > +struct mulable : E { + template MCL_FORCE_INLINE T& operator*=(const S& rhs) { T::mul(static_cast(*this), static_cast(*this), rhs); return static_cast(*this); } + template friend MCL_FORCE_INLINE T operator*(const T& a, const S& b) { T c; T::mul(c, a, b); return c; } +}; + +/* + T must have inv, mul +*/ +template > +struct invertible : E { + MCL_FORCE_INLINE T& operator/=(const T& rhs) { T c; T::inv(c, rhs); T::mul(static_cast(*this), static_cast(*this), c); return static_cast(*this); } + friend MCL_FORCE_INLINE T operator/(const T& a, const T& b) { T c; T::inv(c, b); T::mul(c, c, a); return c; } +}; + +/* + T must have neg +*/ +template > +struct hasNegative : E { + MCL_FORCE_INLINE T operator-() const { T c; T::neg(c, static_cast(*this)); return c; } +}; + +template > +struct hasIO : E { + friend inline std::ostream& operator<<(std::ostream& os, const T& self) + { + const std::ios_base::fmtflags f = os.flags(); + if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator<<:oct is not supported"); + const int base = (f & std::ios_base::hex) ? 16 : 10; + const bool showBase = (f & std::ios_base::showbase) != 0; + std::string str; + self.toStr(str, base, showBase); + return os << str; + } + friend inline std::istream& operator>>(std::istream& is, T& self) + { + const std::ios_base::fmtflags f = is.flags(); + if (f & std::ios_base::oct) throw cybozu::Exception("fpT:operator>>:oct is not supported"); + const int base = (f & std::ios_base::hex) ? 16 : 0; + std::string str; + is >> str; + self.fromStr(str, base); + return is; + } +}; + +template +struct Optimized { + bool hasMulMod() const { return false; } + void init(const T&) {} + static void mulMod(T&, const T&, const T&) {} + static void mulMod(T&, const T&, unsigned int) {} + bool hasPowMod() const { return false; } + static void powMod(T&, const T&, const T&, const T&) {} +}; + +} } // mcl::ope + +#ifdef _WIN32 +// #pragma warning(pop) +#endif diff --git a/include/mcl/power.hpp b/include/mcl/power.hpp new file mode 100644 index 0000000..27fd15e --- /dev/null +++ b/include/mcl/power.hpp @@ -0,0 +1,181 @@ +#pragma once +/** + @file + @brief power + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include +#include +#include +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4616) + #pragma warning(disable : 4800) + #pragma warning(disable : 4244) + #pragma warning(disable : 4127) + #pragma warning(disable : 4512) + #pragma warning(disable : 4146) +#endif +#include +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +namespace mcl { + +namespace power_impl { + +template +struct TagInt { + typedef typename F::BlockType BlockType; + static size_t getBlockSize(const F& x) + { + return F::getBlockSize(x); + } + static BlockType getBlock(const F& x, size_t i) + { + return F::getBlock(x, i); + } + static const BlockType* getBlock(const F& x) + { + return F::getBlock(x); + } + static size_t getBitLen(const F& x) + { + return F::getBitLen(x); + } + static void shr(F& x, size_t n) + { + F::shr(x, x, n); + } +}; + +template<> +struct TagInt { + typedef int BlockType; + static int getBlockSize(int) + { + return 1; + } + static BlockType getBlock(int x, size_t i) + { + assert(i == 0); + cybozu::disable_warning_unused_variable(i); + return x; + } + static const BlockType* getBlock(const int& x) + { + return &x; + } + static size_t getBitLen(int x) + { + return x == 0 ? 1 : cybozu::bsr(x) + 1; + } + static void shr(int& x, size_t n) + { + x >>= n; + } +}; + +template<> +struct TagInt { + typedef size_t BlockType; + static size_t getBlockSize(size_t) + { + return 1; + } + static BlockType getBlock(size_t x, size_t i) + { + assert(i == 0); + cybozu::disable_warning_unused_variable(i); + return x; + } + static const BlockType* getBlock(const size_t& x) + { + return &x; + } + static size_t getBitLen(size_t x) + { + return x == 0 ? 1 : cybozu::bsr(x) + 1; + } + static void shr(size_t& x, size_t n) + { + x >>= n; + } +}; + +template<> +struct TagInt { + typedef mp_limb_t BlockType; + static size_t getBlockSize(const mpz_class& x) + { + return x.get_mpz_t()->_mp_size; + } + static BlockType getBlock(const mpz_class& x, size_t i) + { + return x.get_mpz_t()->_mp_d[i]; + } + static const BlockType* getBlock(const mpz_class& x) + { + return x.get_mpz_t()->_mp_d; + } + static size_t getBitLen(const mpz_class& x) + { + return mpz_sizeinbase(x.get_mpz_t(), 2); + } + static void shr(mpz_class& x, size_t n) + { + x >>= n; + } +}; + +template +void powerArray(G& z, const G& x, const BlockType *y, size_t n) +{ + typedef TagMultiGr TagG; + G out; + TagG::init(out); + G t(x); + for (size_t i = 0; i < n; i++) { + BlockType v = y[i]; + int m = (int)sizeof(BlockType) * 8; + if (i == n - 1) { + // avoid unused multiplication + while (m > 0 && (v & (BlockType(1) << (m - 1))) == 0) { + m--; + } + } + for (int j = 0; j < m; j++) { + if (v & (BlockType(1) << j)) { + TagG::mul(out, out, t); + } + TagG::square(t, t); + } + } + z = out; +} + +template +void power(G& z, const G& x, const F& _y) +{ + typedef TagMultiGr TagG; + typedef power_impl::TagInt TagI; + if (_y == 0) { + TagG::init(z); + return; + } + if (_y == 1) { + z = x; + return; + } + bool isNegative = _y < 0; + const F& y = isNegative ? -_y : _y; + powerArray(z, x, TagI::getBlock(y), TagI::getBlockSize(y)); + if (isNegative) { + TagG::inv(z, z); + } +} + +} } // mcl::power_impl diff --git a/include/mcl/tagmultigr.hpp b/include/mcl/tagmultigr.hpp new file mode 100644 index 0000000..51add36 --- /dev/null +++ b/include/mcl/tagmultigr.hpp @@ -0,0 +1,39 @@ +#pragma once +/** + @file + @brief TagMultiGr + @author MITSUNARI Shigeo(@herumi) + @license modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#include + +namespace mcl { + +// default tag is for multiplicative group +template +struct TagMultiGr { + static void square(G& z, const G& x) + { + G::mul(z, x, x); + } + static void mul(G& z, const G& x, const G& y) + { + G::mul(z, x, y); + } + static void inv(G& z, const G& x) + { + G::inv(z, x); + } + static void div(G& z, const G& x, const G& y) + { + G::div(z, x, y); + } + static void init(G& x) + { + x = 1; + } +}; + +} // mcl + diff --git a/mcl.sln b/mcl.sln new file mode 100644 index 0000000..e0edbbb --- /dev/null +++ b/mcl.sln @@ -0,0 +1,25 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Express 2012 for Windows Desktop +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fp_test", "test\proj\fp_test\fp_test.vcxproj", "{51266DE6-B57B-4AE3-B85C-282F170E1728}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ec_test", "test\proj\ec_test\ec_test.vcxproj", "{46B6E88E-739A-406B-9F68-BC46C5950FA3}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {51266DE6-B57B-4AE3-B85C-282F170E1728}.Debug|x64.ActiveCfg = Debug|x64 + {51266DE6-B57B-4AE3-B85C-282F170E1728}.Debug|x64.Build.0 = Debug|x64 + {51266DE6-B57B-4AE3-B85C-282F170E1728}.Release|x64.ActiveCfg = Release|x64 + {51266DE6-B57B-4AE3-B85C-282F170E1728}.Release|x64.Build.0 = Release|x64 + {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Debug|x64.ActiveCfg = Debug|x64 + {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Debug|x64.Build.0 = Debug|x64 + {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Release|x64.ActiveCfg = Release|x64 + {46B6E88E-739A-406B-9F68-BC46C5950FA3}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/release.props b/release.props new file mode 100644 index 0000000..88b0830 --- /dev/null +++ b/release.props @@ -0,0 +1,12 @@ + + + + + + + + MultiThreadedDLL + + + + \ No newline at end of file diff --git a/sample/Makefile b/sample/Makefile new file mode 100644 index 0000000..96678a6 --- /dev/null +++ b/sample/Makefile @@ -0,0 +1,23 @@ +include ../common.mk + +TARGET=$(TEST_FILE) +LIBS= + +SRC=$(wildcard *.cpp) + +all: $(TARGET) + +test: $(TARGET) + @$(SAMPLE_TEST) + +$(OBJDIR): + @$(MKDIR) $(OBJDIR) + +clean: + $(CLEAN) + +$(LIBS): + $(MAKE) -C ../src + +-include $(DEPEND_FILE) + diff --git a/sample/ecdh_smpl.cpp b/sample/ecdh_smpl.cpp new file mode 100644 index 0000000..91bb9f6 --- /dev/null +++ b/sample/ecdh_smpl.cpp @@ -0,0 +1,69 @@ +/* + sample of Elliptic Curve Diffie-Hellman key sharing +*/ +#include +#include +#include +#include +#include +#include +#include +#include +typedef mcl::FpT<> Fp; + +struct ZnTag; + +typedef mcl::EcT Ec; +typedef mcl::FpT Zn; + +int main() +{ + cybozu::RandomGenerator rg; + /* + system setup with a parameter secp192k1 recommended by SECG + Ec is an elliptic curve over Fp + the cyclic group of

is isomorphic to Zn + */ + const mcl::EcParam& para = mcl::ecparam::secp192k1; + Zn::setModulo(para.n); + Fp::setModulo(para.p); + Ec::setParam(para.a, para.b); + const Ec P(Fp(para.gx), Fp(para.gy)); + + /* + Alice setups a private key a and public key aP + */ + Zn a; + Ec aP; + + a.setRand(rg); + Ec::power(aP, P, a); // aP = a * P; + + std::cout << "aP=" << aP << std::endl; + + /* + Bob setups a private key b and public key bP + */ + Zn b; + Ec bP; + + b.setRand(rg); + Ec::power(bP, P, b); // bP = b * P; + + std::cout << "bP=" << bP << std::endl; + + Ec abP, baP; + + // Alice uses bP(B's public key) and a(A's priavte key) + Ec::power(abP, bP, a); // abP = a * (bP) + + // Bob uses aP(A's public key) and b(B's private key) + Ec::power(baP, aP, b); // baP = b * (aP) + + if (abP == baP) { + std::cout << "key sharing succeed:" << abP << std::endl; + } else { + std::cout << "ERR(not here)" << std::endl; + } +} + diff --git a/sample/random_smpl.cpp b/sample/random_smpl.cpp new file mode 100644 index 0000000..19944de --- /dev/null +++ b/sample/random_smpl.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include +#include +#include +typedef mcl::FpT<> Fp; + +typedef std::map Map; + +int main(int argc, char *argv[]) +{ + cybozu::RandomGenerator rg; + const char *p = mcl::ecparam::secp192k1.p; + if (argc == 2) { + p = argv[1]; + } + Fp::setModulo(p); + Fp x; + printf("p=%s\n", p); + Map m; + for (int i = 0; i < 10000; i++) { + x.setRand(rg); + m[x.toStr(16)]++; + } + for (Map::const_iterator i = m.begin(), ie = m.end(); i != ie; ++i) { + printf("%s %d\n", i->first.c_str(), i->second); + } +} diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..37c0873 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,42 @@ +VER=-3.5 +LLC=llc$(VER) +OPT=opt$(VER) +DIS=llvm-dis$(VER) +ASM=llvm-as$(VER) +OPT_LLC= $(OPT) -O3 -o - | $(LLC) -O3 -o - + +SRC = once.txt all.txt long.txt short.txt mul.txt +TARGET=x64.s x86.s arm.s arm64.s +AFLAGS=-mattr=bmi2 +all: $(TARGET) + +base64.ll: gen.py $(SRC) + python gen.py 64 + +base32.ll: gen.py $(SRC) + python gen.py 32 + +x64: base64.ll + $(LLC) base64.ll -o - -x86-asm-syntax=intel +x86: base32.ll + $(LLC) base32.ll -o - -x86-asm-syntax=intel -march=x86 +arm64: base64.ll + $(LLC) base64.ll -o - -march=aarch64 + +arm: base32.ll + $(LLC) base32.ll -o - -march=arm + +opt: base64.ll + cat base64.ll|$(OPT_LLC) -x86-asm-syntax=intel $(AFLAGS) + +x64.s: base64.ll + cat base64.ll|$(OPT_LLC) $(AFLAGS) > x64.s +x86.s: base32.ll + cat base32.ll|$(OPT_LLC) $(AFLAGS) -march=x86 > x86.s +arm.s: base32.ll + cat base32.ll|$(OPT_LLC) -march=arm > arm.s +arm64.s: base32.ll + cat base64.ll|$(OPT_LLC) -march=aarch64 > arm64.s +clean: + rm -rf base*.ll *.s + diff --git a/src/all.txt b/src/all.txt new file mode 100644 index 0000000..0dbd4f9 --- /dev/null +++ b/src/all.txt @@ -0,0 +1,7 @@ +declare { i$(bit), i1 } @llvm.usub.with.overflow.i$(bit)(i$(bit) %x, i$(bit) %y) + +define i$(unit) @extract$(bit+unit)(i$(bit+unit) %x, i$(bit+unit) %shift) { + %t0 = lshr i$(bit+unit) %x, %shift + %t1 = trunc i$(bit+unit) %t0 to i$(unit) + ret i$(unit) %t1 +} diff --git a/src/gen.py b/src/gen.py new file mode 100644 index 0000000..acdd1ab --- /dev/null +++ b/src/gen.py @@ -0,0 +1,187 @@ +import sys, re + +# @for , , +RE_FOR = re.compile(r'@for\s+(\w+)\s*,\s*([^ ]+)\s*,\s*([^ ]+)') +# $() +RE_VAL = re.compile(r'\$\(([^)]+)\)') +# @define = +RE_DEFINE = re.compile(r'@define\s+(\w+)\s*=(.*)') +# @if +RE_IF = re.compile(r'@if\s+(.*)') +# @elif +RE_ELIF = re.compile(r'@elif\s+(.*)') + +def evalStr(s, envG, envL={}): + def eval2str(x): + s = x.group(1) + v = eval(s, envG, envL) + return str(v) + s = RE_VAL.sub(eval2str, s) + return s + +def parseDefine(s, envG, envL): + """ + if s is @define statement, then update envL and return True + otherwise return False + """ + p = RE_DEFINE.match(s) + if not p: + return False + lhs = p.group(1).strip() + rhs = p.group(2).strip() + envL[lhs] = eval(rhs, envG, envL) + return True + +def parseFor(s, envG): + """ + @for i, 0, 3 + + @endif + + | + v + @define i = 0 + + exp + @define i = 1 + + @define i = 2 + + + """ + out = "" + inFor = False + envL = {} + for line in s.split('\n'): + stripped = line.strip() + # save @define for parseIf + parseDefine(stripped, envG, envL) + if inFor: + if line.strip() == '@endfor': + inFor = False + for i in xrange(b, e): + out += "@define %s = %d\n" % (v, i) + out += sub + else: + sub += line + '\n' + else: + p = RE_FOR.search(stripped) + if p: + v = p.group(1).strip() + b = eval(p.group(2), envG) + e = eval(p.group(3), envG) + sub = "" + inFor = True + else: + out += line + '\n' + return out + +def parseIf(s, envG): + out = "" + IF_INIT = 0 + IF_IF = 1 + IF_ELSE = 2 + ifState = IF_INIT + ifVar = False + # available variables in @() + envL = {} + def evalIntLoc(s): + return eval(s, envG, envL) + for line in s.split('\n'): + stripped = line.strip() + # remove @define + if parseDefine(stripped, envG, envL): + continue + if ifState == IF_INIT: + p = RE_IF.match(stripped) + if p: + ifState = IF_IF + ifVar = evalIntLoc(p.group(1)) + continue + elif ifState == IF_IF: + if stripped == '@endif': + ifState = IF_INIT + continue + elif stripped == '@else': + ifState = IF_ELSE + ifVar = not ifVar + continue + p = RE_ELIF.match(stripped) + if p: + ifVar = evalIntLoc(p.group(1)) + continue + if not ifVar: + continue + elif ifState == IF_ELSE: + if stripped == '@endif': + ifState = IF_INIT + continue + if not ifVar: + continue + else: + raise Exception('bad state', ifState) + out += evalStr(line, envG, envL) + '\n' + return out + +def parse(s, unitL, bitL): + """ + eval "@()" to integer + + @for , , + ... + @endfor + + REMARK : @for is not nestable + + @define = + REMARK : var is global + + @if + @elif + @endif + + REMARK : @if is not nestable + """ + # available variables in @() + envG = { + 'unit' : unitL, + 'bit' : bitL, + 'N' : bitL / unitL, + } + s = parseFor(s, envG) + s = parseIf(s, envG) + return s + +def gen(fo, inLame, unitL, bitLL): + fi = open(inLame, 'r') + s = fi.read() + fi.close() + for bitL in bitLL: + t = parse(s, unitL, bitL) + fo.write(t) + +def main(): + argv = sys.argv + args = len(argv) + unitL = 64 + if args == 2: + unitL = int(argv[1]) + if unitL not in [32, 64]: + print "bad unitL", unitL + exit(1) + + outLame = 'base%d.ll' % unitL + fo = open(outLame, 'w') +# gen(fo, 't.txt', unitL, [unitL * 4]) +# exit(1) + gen(fo, 'once.txt', unitL, [unitL * 2]) + + bitLL = range(unitL, 576 + 1, unitL) + gen(fo, 'all.txt', unitL, bitLL) + gen(fo, 'short.txt', unitL, bitLL) + gen(fo, 'long.txt', unitL, bitLL) + gen(fo, 'mul.txt', unitL, bitLL[1:]) + fo.close() + +if __name__ == "__main__": + main() diff --git a/src/long.txt b/src/long.txt new file mode 100644 index 0000000..31082a7 --- /dev/null +++ b/src/long.txt @@ -0,0 +1,54 @@ +define void @mcl_fp_add$(bit)L(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) { + %x = load i$(bit)* %px + %y = load i$(bit)* %py + %p = load i$(bit)* %pp + %x1 = zext i$(bit) %x to i$(bit+unit) + %y1 = zext i$(bit) %y to i$(bit+unit) + %p1 = zext i$(bit) %p to i$(bit+unit) + %t0 = add i$(bit+unit) %x1, %y1 ; x + y + %t1 = trunc i$(bit+unit) %t0 to i$(bit) + store i$(bit) %t1, i$(bit)* %pz + %vc = sub i$(bit+unit) %t0, %p1 + %c = lshr i$(bit+unit) %vc, $(bit+unit-1) + %c1 = trunc i$(bit+unit) %c to i1 + br i1 %c1, label %carry, label %nocarry +nocarry: + %v = trunc i$(bit+unit) %vc to i$(bit) + store i$(bit) %v, i$(bit)* %pz + ret void +carry: + ret void +} + +define internal { i$(bit), i$(unit) } @local_sbb$(bit)(i$(bit) %x, i$(bit) %y) { + %x1 = zext i$(bit) %x to i$(bit+unit) + %y1 = zext i$(bit) %y to i$(bit+unit) + %v1 = sub i$(bit+unit) %x1, %y1 + %v = trunc i$(bit+unit) %v1 to i$(bit) + %c = lshr i$(bit+unit) %v1, $(bit) + %c1 = trunc i$(bit+unit) %c to i$(unit) + %r1 = insertvalue { i$(bit), i$(unit) } undef, i$(bit) %v, 0 + %r2 = insertvalue { i$(bit), i$(unit) } %r1, i$(unit) %c1, 1 + ret { i$(bit), i$(unit) } %r2 +} + +define void @mcl_fp_sub$(bit)L(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) { + %x = load i$(bit)* %px + %y = load i$(bit)* %py + %x1 = zext i$(bit) %x to i$(bit+unit) + %y1 = zext i$(bit) %y to i$(bit+unit) + %vc = sub i$(bit+unit) %x1, %y1 + %v = trunc i$(bit+unit) %vc to i$(bit) + %c = lshr i$(bit+unit) %vc, $(bit+unit-1) + %c1 = trunc i$(bit+unit) %c to i1 + store i$(bit) %v, i$(bit)* %pz + br i1 %c1, label %carry, label %nocarry +nocarry: + ret void +carry: + %p = load i$(bit)* %pp + %t = add i$(bit) %v, %p ; x - y + p + store i$(bit) %t, i$(bit)* %pz + ret void +} + diff --git a/src/mul.txt b/src/mul.txt new file mode 100644 index 0000000..4621c7a --- /dev/null +++ b/src/mul.txt @@ -0,0 +1,81 @@ +@define bu = bit + unit +define private i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y) +@if N > 4 +noinline +@endif +{ +@for i, 0, N + %x$(i) = call i$(unit) @extract$(bit)(i$(bit) %x, i$(bit) $(unit*i)) + %x$(i)y = call i$(unit*2) @mul$(unit)x$(unit)(i$(unit) %x$(i), i$(unit) %y) + %x$(i)y0 = zext i$(unit*2) %x$(i)y to i$(bu) +@endfor +@for i, 1, N + %x$(i)y1 = shl i$(bu) %x$(i)y0, $(unit*i) +@endfor + %t0 = add i$(bu) %x0y0, %x1y1 +@for i, 1, N-1 + %t$(i) = add i$(bu) %t$(i-1), %x$(i+1)y1 +@endfor + ret i$(bu) %t$(N-2) +} +define void @mcl_fp_mul$(bit)pre(i$(unit)* %pz, i$(bit)* %px, i$(bit)* %py) { + %x = load i$(bit)* %px + %y = load i$(bit)* %py +@for i, 0, N + %y$(i) = call i$(unit) @extract$(bit)(i$(bit) %y, i$(bit) $(unit*i)) +@endfor + %sum0 = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y0) + %t0 = trunc i$(bu) %sum0 to i$(unit) + store i$(unit) %t0, i$(unit)* %pz +@for i, 1, N + + %s$(i-1) = lshr i$(bu) %sum$(i-1), $(unit) + %xy$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y$(i)) + %sum$(i) = add i$(bu) %s$(i-1), %xy$(i) + %z$(i) = getelementptr i$(unit)* %pz, i32 $(i) + @if i < N - 1 + %ts$(i) = trunc i$(bu) %sum$(i) to i$(unit) + store i$(unit) %ts$(i), i$(unit)* %z$(i) + @endif +@endfor + %p = bitcast i$(unit)* %z$(N-1) to i$(bu)* + store i$(bu) %sum$(N-1), i$(bu)* %p + ret void +} + +@define bu = bit + unit +@define bu2 = bit + unit * 2 +define void @mcl_fp_mont$(bit)(i$(bit)* %pz, i$(bit)* %px, i$(unit)* %py, i$(bit)* %pp, i$(unit) %r) { + %p = load i$(bit)* %pp + %x = load i$(bit)* %px + +@for i, 0, N + %py$(i) = getelementptr i$(unit)* %py, i$(unit) $(i) + %y$(i) = load i$(unit)* %py$(i) + %xy$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %x, i$(unit) %y$(i)) +@if i == 0 + %a0 = zext i$(bu) %xy0 to i$(bu2) + + %at$(i) = trunc i$(bu) %xy$(i) to i$(unit) +@else + %xye$(i) = zext i$(bu) %xy$(i) to i$(bu2) + %a$(i) = add i$(bu2) %s$(i-1), %xye$(i) + %at$(i) = trunc i$(bu2) %a$(i) to i$(unit) +@endif + %q$(i) = mul i$(unit) %at$(i), %r + %pq$(i) = call i$(bu) @mul$(bit)x$(unit)(i$(bit) %p, i$(unit) %q$(i)) + %pqe$(i) = zext i$(bu) %pq$(i) to i$(bu2) + %t$(i) = add i$(bu2) %a$(i), %pqe$(i) + %s$(i) = lshr i$(bu2) %t$(i), $(unit) +@endfor + %v = trunc i$(bu2) %s$(N-1) to i$(bu) + %pe = zext i$(bit) %p to i$(bu) + %vc = sub i$(bu) %v, %pe + %c = lshr i$(bu) %vc, $(bit) + %c1 = trunc i$(bu) %c to i1 + %z = select i1 %c1, i$(bu) %v, i$(bu) %vc + %zt = trunc i$(bu) %z to i$(bit) + store i$(bit) %zt, i$(bit)* %pz + ret void +} + diff --git a/src/once.txt b/src/once.txt new file mode 100644 index 0000000..501fb2b --- /dev/null +++ b/src/once.txt @@ -0,0 +1,74 @@ + +define i$(unit*2) @mul$(unit)x$(unit)(i$(unit) %x, i$(unit) %y) { + %x0 = zext i$(unit) %x to i$(unit*2) + %y0 = zext i$(unit) %y to i$(unit*2) + %z = mul i$(unit*2) %x0, %y0 + ret i$(unit*2) %z +} + +; NIST_P192 +; 0xfffffffffffffffffffffffffffffffeffffffffffffffff +; +; 0 1 2 +; ffffffffffffffff fffffffffffffffe ffffffffffffffff +; +; p = (1 << 192) - (1 << 64) - 1 +; (1 << 192) % p = (1 << 64) + 1 +; +; L : 192bit +; Hi: 64bit +; x = [H:L] = [H2:H1:H0:L] +; mod p +; x = L + H + (H << 64) +; = L + H + [H1:H0:0] + H2 + (H2 << 64) +;[e:t] = L + H + [H1:H0:H2] + [H2:0] ; 2bit(e) over +; = t + e + (e << 64) + +define internal i64 @extract192to64(i192 %x, i192 %shift) { + %t0 = lshr i192 %x, %shift + %t1 = trunc i192 %t0 to i64 + ret i64 %t1 +} + +define internal void @modNIST_P192(i192* %out, i192* %px) { + %L192 = load i192* %px + %L = zext i192 %L192 to i256 + + %pH = getelementptr i192* %px, i32 1 + %H192 = load i192* %pH + %H = zext i192 %H192 to i256 + + %H10_ = shl i192 %H192, 64 + %H10 = zext i192 %H10_ to i256 + + %H2_ = call i64 @extract192to64(i192 %H192, i192 128) + %H2 = zext i64 %H2_ to i256 + %H102 = or i256 %H10, %H2 + + %H2s = shl i256 %H2, 64 + + %t0 = add i256 %L, %H + %t1 = add i256 %t0, %H102 + %t2 = add i256 %t1, %H2s + + %e = lshr i256 %t2, 192 + %t3 = trunc i256 %t2 to i192 + %e1 = trunc i256 %e to i192 + + + %t4 = add i192 %t3, %e1 + %e2 = shl i192 %e1, 64 + %t5 = add i192 %t4, %e2 + + store i192 %t5, i192* %out + + ret void +} + +define void @mcl_fp_mul_NIST_P192(i192* %pz, i192* %px, i192* %py) { + %buf = alloca i192, i32 2 + %p = bitcast i192* %buf to i$(unit)* + call void @mcl_fp_mul192pre(i$(unit)* %p, i192* %px, i192* %py) + call void @modNIST_P192(i192* %pz, i192* %buf) + ret void +} diff --git a/src/short.txt b/src/short.txt new file mode 100644 index 0000000..931a63f --- /dev/null +++ b/src/short.txt @@ -0,0 +1,46 @@ +define void @mcl_fp_add$(bit)S(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) { +entry: + %x = load i$(bit)* %px + %y = load i$(bit)* %py + %p = load i$(bit)* %pp + %x1 = zext i$(bit) %x to i$(bit+unit) + %y1 = zext i$(bit) %y to i$(bit+unit) + %p1 = zext i$(bit) %p to i$(bit+unit) + %t0 = add i$(bit+unit) %x1, %y1 ; x + y + %t1 = sub i$(bit+unit) %t0, %p1 ; x + y - p + %t2 = lshr i$(bit+unit) %t1, $(bit) + %t3 = trunc i$(bit+unit) %t2 to i1 + %t4 = select i1 %t3, i$(bit+unit) %t0, i$(bit+unit) %t1 + %t5 = trunc i$(bit+unit) %t4 to i$(bit) + store i$(bit) %t5, i$(bit)* %pz + ret void +} + +define internal { i$(bit), i$(unit) } @mcl_local_sbb$(bit)(i$(bit) %x, i$(bit) %y) { + %x1 = zext i$(bit) %x to i$(bit+unit) + %y1 = zext i$(bit) %y to i$(bit+unit) + %v1 = sub i$(bit+unit) %x1, %y1 + %v = trunc i$(bit+unit) %v1 to i$(bit) + %c = lshr i$(bit+unit) %v1, $(bit) + %c1 = trunc i$(bit+unit) %c to i$(unit) + %r1 = insertvalue { i$(bit), i$(unit) } undef, i$(bit) %v, 0 + %r2 = insertvalue { i$(bit), i$(unit) } %r1, i$(unit) %c1, 1 + ret { i$(bit), i$(unit) } %r2 +} + +define void @mcl_fp_sub$(bit)S(i$(bit)* %pz, i$(bit)* %px, i$(bit)* %py, i$(bit)* %pp) { + %x = load i$(bit)* %px + %y = load i$(bit)* %py + %x1 = zext i$(bit) %x to i$(bit+unit) + %y1 = zext i$(bit) %y to i$(bit+unit) + %vc = sub i$(bit+unit) %x1, %y1 + %v = trunc i$(bit+unit) %vc to i$(bit) + %c = lshr i$(bit+unit) %vc, $(bit+unit-1) + %c1 = trunc i$(bit+unit) %c to i1 + %p = load i$(bit)* %pp + %a = select i1 %c1, i$(bit) %p, i$(bit) 0 + %v1 = add i$(bit) %v, %a + store i$(bit) %v1, i$(bit)* %pz + ret void +} + diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..b1e01dd --- /dev/null +++ b/test/Makefile @@ -0,0 +1,42 @@ +include ../common.mk + +ifeq ($(USE_MONT_FP),1) + CFLAGS += -DUSE_MONT_FP +endif + +ifeq ($(USE_LLVM),1) + CFLAGS += -DMIE_USE_LLVM + ASM_SRC=../src/$(CPU).s + ASM_OBJ=$(ASM_SRC:.s=.o) + SRC+=$(ASM_SRC) + LDFLAGS+=$(ASM_OBJ) +endif + +TARGET=$(TEST_FILE) +LIBS= + +SRC=fp_test.cpp ec_test.cpp fp_util_test.cpp +ifeq ($(CPU),x64) + SRC+=fp_generator_test.cpp mont_fp_test.cpp +endif + +all: $(TARGET) + +test: $(TARGET) $(ASM_OBJ) + @$(UNIT_TEST) + +$(OBJDIR): + @$(MKDIR) $(OBJDIR) + +clean: + $(CLEAN) + +$(LIBS): + $(MAKE) -C ../src + +-include $(DEPEND_FILE) + +ifeq ($(USE_LLVM),1) +$(ASM_OBJ): $(ASM_SRC) + $(CXX) $< -o $@ -c +endif diff --git a/test/base_test.cpp b/test/base_test.cpp new file mode 100644 index 0000000..29a177f --- /dev/null +++ b/test/base_test.cpp @@ -0,0 +1,392 @@ +#include +#define MCL_USE_LLVM +#include +#include +#include +#include +#include +#include +#include + +#include +#if (CYBOZU_HOST == CYBOZU_HOST_INTEL) && (CYBOZU_OS_BIT == 64) + #define USE_XBYAK + static mcl::FpGenerator fg; +#endif +#define PUT(x) std::cout << #x "=" << (x) << std::endl + +const size_t MAX_N = 32; +typedef mcl::fp::Unit Unit; + +size_t getUnitN(size_t bitLen) +{ + return (bitLen + sizeof(Unit) * 8 - 1) / (sizeof(Unit) * 8); +} + +void setMpz(mpz_class& mx, const Unit *x, size_t n) +{ + mcl::Gmp::setRaw(mx, x, n); +} +void getMpz(Unit *x, size_t n, const mpz_class& mx) +{ + mcl::fp::local::toArray(x, n, mx.get_mpz_t()); +} + +struct Montgomery { + mpz_class p_; + mpz_class R_; // (1 << (n_ * 64)) % p + mpz_class RR_; // (R * R) % p + Unit r_; // p * r = -1 mod M = 1 << 64 + size_t n_; + Montgomery() {} + explicit Montgomery(const mpz_class& p) + { + p_ = p; + r_ = mcl::montgomery::getCoff(mcl::Gmp::getBlock(p, 0)); + n_ = mcl::Gmp::getBlockSize(p); + R_ = 1; + R_ = (R_ << (n_ * 64)) % p_; + RR_ = (R_ * R_) % p_; + } + + void toMont(mpz_class& x) const { mul(x, x, RR_); } + void fromMont(mpz_class& x) const { mul(x, x, 1); } + + void mont(Unit *z, const Unit *x, const Unit *y) const + { + mpz_class mx, my; + setMpz(mx, x, n_); + setMpz(my, y, n_); + mul(mx, mx, my); + getMpz(z, n_, mx); + } + void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) const + { +#if 1 + const size_t ySize = mcl::Gmp::getBlockSize(y); + mpz_class c = y == 0 ? mpz_class(0) : x * mcl::Gmp::getBlock(y, 0); + Unit q = c == 0 ? 0 : mcl::Gmp::getBlock(c, 0) * r_; + c += p_ * q; + c >>= sizeof(Unit) * 8; + for (size_t i = 1; i < n_; i++) { + if (i < ySize) { + c += x * mcl::Gmp::getBlock(y, i); + } + Unit q = c == 0 ? 0 : mcl::Gmp::getBlock(c, 0) * r_; + c += p_ * q; + c >>= sizeof(Unit) * 8; + } + if (c >= p_) { + c -= p_; + } + z = c; +#else + z = x * y; + const size_t zSize = mcl::Gmp::getBlockSize(z); + for (size_t i = 0; i < n_; i++) { + if (i < zSize) { + Unit q = mcl::Gmp::getBlock(z, 0) * r_; + z += p_ * (mp_limb_t)q; + } + z >>= sizeof(Unit) * 8; + } + if (z >= p_) { + z -= p_; + } +#endif + } +}; + +void put(const char *msg, const Unit *x, size_t n) +{ + printf("%s ", msg); + for (size_t i = 0; i < n; i++) printf("%016llx ", (long long)x[n - 1 - i]); + printf("\n"); +} +void verifyEqual(const Unit *x, const Unit *y, size_t n, const char *file, int line) +{ + bool ok = mcl::fp::local::isEqualArray(x, y, n); + CYBOZU_TEST_ASSERT(ok); + if (ok) return; + printf("%s:%d\n", file, line); + put("L", x, n); + put("R", y, n); + exit(1); +} +#define VERIFY_EQUAL(x, y, n) verifyEqual(x, y, n, __FILE__, __LINE__) + +void addC(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) +{ + mpz_class mx, my, mp; + setMpz(mx, x, n); + setMpz(my, y, n); + setMpz(mp, p, n); + mx += my; + if (mx >= mp) mx -= mp; + getMpz(z, n, mx); +} +void subC(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) +{ + mpz_class mx, my, mp; + setMpz(mx, x, n); + setMpz(my, y, n); + setMpz(mp, p, n); + mx -= my; + if (mx < 0) mx += mp; + getMpz(z, n, mx); +} +static inline void set_zero(mpz_t& z, Unit *p, size_t n) +{ + z->_mp_alloc = (int)n; + z->_mp_size = 0; + z->_mp_d = (mp_limb_t*)p; +} +static inline void set_mpz_t(mpz_t& z, const Unit* p, int n) +{ + z->_mp_alloc = n; + int i = n; + while (i > 0 && p[i - 1] == 0) { + i--; + } + z->_mp_size = i; + z->_mp_d = (mp_limb_t*)p; +} + +// z[2n] <- x[n] * y[n] +void mulPreC(Unit *z, const Unit *x, const Unit *y, size_t n) +{ +#if 1 + mpz_t mx, my, mz; + set_zero(mz, z, n * 2); + set_mpz_t(mx, x, n); + set_mpz_t(my, y, n); + mpz_mul(mz, mx, my); + mcl::fp::local::toArray(z, n * 2, mz); +#else + mpz_class mx, my; + setMpz(mx, x, n); + setMpz(my, y, n); + mx *= my; + getMpz(z, n * 2, mx); +#endif +} + +void modC(Unit *y, const Unit *x, const Unit *p, size_t n) +{ + mpz_t mx, my, mp; + set_mpz_t(mx, x, n * 2); + set_mpz_t(my, y, n); + set_mpz_t(mp, p, n); + mpz_mod(my, mx, mp); + mcl::fp::local::clearArray(y, my->_mp_size, n); +} + +void mul(Unit *z, const Unit *x, const Unit *y, const Unit *p, size_t n) +{ + Unit ret[MAX_N * 2]; + mpz_t mx, my, mz, mp; + set_zero(mz, ret, MAX_N * 2); + set_mpz_t(mx, x, n); + set_mpz_t(my, y, n); + set_mpz_t(mp, p, n); + mpz_mul(mz, mx, my); + mpz_mod(mz, mz, mp); + mcl::fp::local::toArray(z, n, mz); +} + +typedef mcl::fp::void3op void3op; +typedef mcl::fp::void4op void4op; +typedef mcl::fp::void4Iop void4Iop; + +const struct FuncOp { + size_t bitLen; + void4op addS; + void4op addL; + void4op subS; + void4op subL; + void3op mulPre; + void4Iop mont; +} gFuncOpTbl[] = { + { 128, mcl_fp_add128S, mcl_fp_add128L, mcl_fp_sub128S, mcl_fp_sub128L, mcl_fp_mul128pre, mcl_fp_mont128 }, + { 192, mcl_fp_add192S, mcl_fp_add192L, mcl_fp_sub192S, mcl_fp_sub192L, mcl_fp_mul192pre, mcl_fp_mont192 }, + { 256, mcl_fp_add256S, mcl_fp_add256L, mcl_fp_sub256S, mcl_fp_sub256L, mcl_fp_mul256pre, mcl_fp_mont256 }, + { 320, mcl_fp_add320S, mcl_fp_add320L, mcl_fp_sub320S, mcl_fp_sub320L, mcl_fp_mul320pre, mcl_fp_mont320 }, + { 384, mcl_fp_add384S, mcl_fp_add384L, mcl_fp_sub384S, mcl_fp_sub384L, mcl_fp_mul384pre, mcl_fp_mont384 }, + { 448, mcl_fp_add448S, mcl_fp_add448L, mcl_fp_sub448S, mcl_fp_sub448L, mcl_fp_mul448pre, mcl_fp_mont448 }, + { 512, mcl_fp_add512S, mcl_fp_add512L, mcl_fp_sub512S, mcl_fp_sub512L, mcl_fp_mul512pre, mcl_fp_mont512 }, +#if CYBOZU_OS_BIT == 32 + { 160, mcl_fp_add160S, mcl_fp_add160L, mcl_fp_sub160S, mcl_fp_sub160L, mcl_fp_mul160pre, mcl_fp_mont160 }, + { 224, mcl_fp_add224S, mcl_fp_add224L, mcl_fp_sub224S, mcl_fp_sub224L, mcl_fp_mul224pre, mcl_fp_mont224 }, + { 288, mcl_fp_add288S, mcl_fp_add288L, mcl_fp_sub288S, mcl_fp_sub288L, mcl_fp_mul288pre, mcl_fp_mont288 }, + { 352, mcl_fp_add352S, mcl_fp_add352L, mcl_fp_sub352S, mcl_fp_sub352L, mcl_fp_mul352pre, mcl_fp_mont352 }, + { 416, mcl_fp_add416S, mcl_fp_add416L, mcl_fp_sub416S, mcl_fp_sub416L, mcl_fp_mul416pre, mcl_fp_mont416 }, + { 480, mcl_fp_add480S, mcl_fp_add480L, mcl_fp_sub480S, mcl_fp_sub480L, mcl_fp_mul480pre, mcl_fp_mont480 }, + { 544, mcl_fp_add544S, mcl_fp_add544L, mcl_fp_sub544S, mcl_fp_sub544L, mcl_fp_mul544pre, mcl_fp_mont544 }, +#else + { 576, mcl_fp_add576S, mcl_fp_add576L, mcl_fp_sub576S, mcl_fp_sub576L, mcl_fp_mul576pre, mcl_fp_mont576 }, +#endif +}; + +FuncOp getFuncOp(size_t bitLen) +{ + typedef std::map Map; + static Map map; + static bool init = false; + if (!init) { + init = true; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(gFuncOpTbl); i++) { + map[gFuncOpTbl[i].bitLen] = gFuncOpTbl[i]; + } + } + for (Map::const_iterator i = map.begin(), ie = map.end(); i != ie; ++i) { + if (bitLen <= i->second.bitLen) { + return i->second; + } + } + printf("ERR bitLen=%d\n", (int)bitLen); + exit(1); +} + +void test(const Unit *p, size_t bitLen) +{ + printf("bitLen %d\n", (int)bitLen); + const size_t n = getUnitN(bitLen); +#ifdef NDEBUG + bool doBench = true; +#else + bool doBench = false; +#endif + const FuncOp funcOp = getFuncOp(bitLen); + const void4op addS = funcOp.addS; + const void4op addL = funcOp.addL; + const void4op subS = funcOp.subS; + const void4op subL = funcOp.subL; + const void3op mulPre = funcOp.mulPre; + const void4Iop mont = funcOp.mont; + + mcl::fp::Unit x[MAX_N], y[MAX_N]; + mcl::fp::Unit z[MAX_N], w[MAX_N]; + mcl::fp::Unit z2[MAX_N * 2]; + mcl::fp::Unit w2[MAX_N * 2]; + cybozu::XorShift rg; + mcl::fp::getRandVal(x, rg, p, bitLen); + mcl::fp::getRandVal(y, rg, p, bitLen); + const size_t C = 10; + + addC(z, x, y, p, n); + addS(w, x, y, p); + VERIFY_EQUAL(z, w, n); + for (size_t i = 0; i < C; i++) { + addC(z, y, z, p, n); + addS(w, y, w, p); + VERIFY_EQUAL(z, w, n); + addC(z, y, z, p, n); + addL(w, y, w, p); + VERIFY_EQUAL(z, w, n); + subC(z, x, z, p, n); + subS(w, x, w, p); + VERIFY_EQUAL(z, w, n); + subC(z, x, z, p, n); + subL(w, x, w, p); + VERIFY_EQUAL(z, w, n); + mulPreC(z2, x, z, n); + mulPre(w2, x, z); + VERIFY_EQUAL(z2, w2, n * 2); + } + { + mpz_class mp; + setMpz(mp, p, n); + Montgomery m(mp); +#ifdef USE_XBYAK + if (bitLen > 128) fg.init(p, n); +#endif + /* + real mont + 0 0 + 1 R^-1 + R 1 + -1 -R^-1 + -R -1 + */ + mpz_class t = 1; + const mpz_class R = (t << (n * 64)) % mp; + const mpz_class tbl[] = { + 0, 1, R, mp - 1, mp - R + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const mpz_class& mx = tbl[i]; + for (size_t j = i; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) { + const mpz_class& my = tbl[j]; + getMpz(x, n, mx); + getMpz(y, n, my); + m.mont(z, x, y); + mont(w, x, y, p, m.r_); + VERIFY_EQUAL(z, w, n); +#ifdef USE_XBYAK + if (bitLen > 128) { + fg.mul_(w, x, y); + VERIFY_EQUAL(z, w, n); + } +#endif + } + } + if (doBench) { +// CYBOZU_BENCH("montC", m.mont, x, y, x); + CYBOZU_BENCH("montA ", mont, x, y, x, p, m.r_); + } + } + if (doBench) { +// CYBOZU_BENCH("addS", addS, x, y, x, p); // slow +// CYBOZU_BENCH("subS", subS, x, y, x, p); +// CYBOZU_BENCH("addL", addL, x, y, x, p); +// CYBOZU_BENCH("subL", subL, x, y, x, p); + CYBOZU_BENCH("mulPreA", mulPre, w2, y, x); + CYBOZU_BENCH("mulPreC", mulPreC, w2, y, x, n); + CYBOZU_BENCH("modC ", modC, x, w2, p, n); + } +#ifdef USE_XBYAK + if (bitLen <= 128) return; + if (doBench) { + fg.init(p, n); + CYBOZU_BENCH("addA ", fg.add_, x, y, x); + CYBOZU_BENCH("subA ", fg.sub_, x, y, x); +// CYBOZU_BENCH("mulA", fg.mul_, x, y, x); + } +#endif + printf("mont test %d\n", (int)bitLen); +} + +CYBOZU_TEST_AUTO(all) +{ + const struct { + size_t n; + const uint64_t p[9]; + } tbl[] = { +// { 2, { 0xf000000000000001, 1, } }, + { 2, { 0x000000000000001d, 0x8000000000000000, } }, + { 3, { 0x000000000000012b, 0x0000000000000000, 0x0000000080000000, } }, +// { 3, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x07ffffffffffffff, } }, +// { 3, { 0x7900342423332197, 0x1234567890123456, 0x1480948109481904, } }, + { 3, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0xffffffffffffffff, } }, +// { 4, { 0x7900342423332197, 0x4242342420123456, 0x1234567892342342, 0x1480948109481904, } }, +// { 4, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x17ffffffffffffff, 0x1513423423423415, } }, + { 4, { 0xa700000000000013, 0x6121000000000013, 0xba344d8000000008, 0x2523648240000001, } }, +// { 5, { 0x0000000000000009, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, } }, + { 5, { 0xfffffffffffffc97, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, +// { 6, { 0x4720422423332197, 0x0034230847204720, 0x3456789012345679, 0x4820984290482212, 0x9482094820948209, 0x0194810841094810, } }, +// { 6, { 0x7204224233321972, 0x0342308472047204, 0x4567890123456790, 0x0948204204243123, 0x2098420984209482, 0x2093482094810948, } }, + { 6, { 0x00000000ffffffff, 0xffffffff00000000, 0xfffffffffffffffe, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, +// { 7, { 0x0000000000000063, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, } }, + { 7, { 0x000000000fffcff1, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, + { 8, { 0xffffffffffffd0c9, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, } }, + { 9, { 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x00000000000001ff, } }, +// { 9, { 0x4720422423332197, 0x0034230847204720, 0x3456789012345679, 0x2498540975555312, 0x9482904924029424, 0x0948209842098402, 0x1098410948109482, 0x0820958209582094, 0x0000000000000029, } }, +// { 9, { 0x0f69466a74defd8d, 0xfffffffe26f2fc17, 0x7fffffffffffffff, 0x8572938572398583, 0x5732057823857293, 0x9820948205872380, 0x3409238420492034, 0x9483842098340298, 0x0000000000000003, } }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const size_t n = tbl[i].n; + const size_t bitLen = (n - 1) * 64 + cybozu::bsr(tbl[i].p[n - 1]) + 1; + test((const Unit*)tbl[i].p, bitLen); + } +} + diff --git a/test/ec_test.cpp b/test/ec_test.cpp new file mode 100644 index 0000000..1255a1d --- /dev/null +++ b/test/ec_test.cpp @@ -0,0 +1,397 @@ +#define PUT(x) std::cout << #x "=" << (x) << std::endl +#define CYBOZU_TEST_DISABLE_AUTO_RUN +#include +#include +#include + +#include +typedef mcl::FpT<> Fp_3; +typedef mcl::FpT<> Fp_4; +typedef mcl::FpT<> Fp_6; +typedef mcl::FpT<> Fp_9; +#include +#include +#include + +struct tagZn; +typedef mcl::FpT Zn; + +template +struct Test { + typedef mcl::EcT Ec; + const mcl::EcParam& para; + Test(const mcl::EcParam& para) + : para(para) + { + Fp::setModulo(para.p); + Zn::setModulo(para.n); + Ec::setParam(para.a, para.b); +// CYBOZU_TEST_EQUAL(para.bitLen, Fp(-1).getBitLen()); + } + void cstr() const + { + Ec O; + CYBOZU_TEST_ASSERT(O.isZero()); + Ec P; + Ec::neg(P, O); + CYBOZU_TEST_EQUAL(P, O); + } + void ope() const + { + Fp x(para.gx); + Fp y(para.gy); + Zn n = 0; + CYBOZU_TEST_ASSERT(Ec::isValid(x, y)); + Ec P(x, y), Q, R, O; + { + Ec::neg(Q, P); + CYBOZU_TEST_EQUAL(Q.x, P.x); + CYBOZU_TEST_EQUAL(Q.y, -P.y); + + R = P + Q; + CYBOZU_TEST_ASSERT(R.isZero()); + + R = P + O; + CYBOZU_TEST_EQUAL(R, P); + R = O + P; + CYBOZU_TEST_EQUAL(R, P); + } + + { + Ec::dbl(R, P); + Ec R2 = P + P; + CYBOZU_TEST_EQUAL(R, R2); + { + Ec P2 = P; + Ec::dbl(P2, P2); + CYBOZU_TEST_EQUAL(P2, R2); + } + Ec R3L = R2 + P; + Ec R3R = P + R2; + CYBOZU_TEST_EQUAL(R3L, R3R); + { + Ec RR = R2; + RR = RR + P; + CYBOZU_TEST_EQUAL(RR, R3L); + RR = R2; + RR = P + RR; + CYBOZU_TEST_EQUAL(RR, R3L); + RR = P; + RR = RR + RR; + CYBOZU_TEST_EQUAL(RR, R2); + } + Ec::power(R, P, 2); + CYBOZU_TEST_EQUAL(R, R2); + Ec R4L = R3L + R2; + Ec R4R = R2 + R3L; + CYBOZU_TEST_EQUAL(R4L, R4R); + Ec::power(R, P, 5); + CYBOZU_TEST_EQUAL(R, R4L); + } + { + R = P; + for (int i = 0; i < 10; i++) { + R += P; + } + Ec R2; + Ec::power(R2, P, 11); + CYBOZU_TEST_EQUAL(R, R2); + } + Ec::power(R, P, n - 1); + CYBOZU_TEST_EQUAL(R, -P); + R += P; // Ec::power(R, P, n); + CYBOZU_TEST_ASSERT(R.isZero()); + } + + void power() const + { + Fp x(para.gx); + Fp y(para.gy); + Ec P(x, y); + Ec Q; + Ec R; + for (int i = 0; i < 100; i++) { + Ec::power(Q, P, i); + CYBOZU_TEST_EQUAL(Q, R); + R += P; + } + } + + void neg_power() const + { + Fp x(para.gx); + Fp y(para.gy); + Ec P(x, y); + Ec Q; + Ec R; + for (int i = 0; i < 100; i++) { + Ec::power(Q, P, -i); + CYBOZU_TEST_EQUAL(Q, R); + R -= P; + } + } + void squareRoot() const + { + Fp x(para.gx); + Fp y(para.gy); + bool odd = Fp::isYodd(y); + Fp yy; + Ec::getYfromX(yy, x, odd); + CYBOZU_TEST_EQUAL(yy, y); + Fp::neg(y, y); + odd = Fp::isYodd(y); + yy.clear(); + Ec::getYfromX(yy, x, odd); + CYBOZU_TEST_EQUAL(yy, y); + } + void power_fp() const + { + Fp x(para.gx); + Fp y(para.gy); + Ec P(x, y); + Ec Q; + Ec R; + for (int i = 0; i < 100; i++) { + Ec::power(Q, P, Zn(i)); + CYBOZU_TEST_EQUAL(Q, R); + R += P; + } + } + void binaryExpression() const + { + puts("test binaryExpression"); + const Fp x(para.gx); + const Fp y(para.gy); + Ec P(x, y); + Ec Q; + // not compressed + Ec::setCompressedExpression(false); + { + cybozu::BitVector bv; + P.appendToBitVec(bv); + Q.fromBitVec(bv); + CYBOZU_TEST_EQUAL(P, Q); + } + { + P = -P; + cybozu::BitVector bv; + P.appendToBitVec(bv); + Q.fromBitVec(bv); + CYBOZU_TEST_EQUAL(P, Q); + } + P.clear(); + { + cybozu::BitVector bv; + P.appendToBitVec(bv); + Q.fromBitVec(bv); + CYBOZU_TEST_EQUAL(P, Q); + } + // compressed + Ec::setCompressedExpression(true); + P.set(x, y); + { + cybozu::BitVector bv; + P.appendToBitVec(bv); + Q.fromBitVec(bv); + CYBOZU_TEST_EQUAL(P, Q); + } + { + P = -P; + cybozu::BitVector bv; + P.appendToBitVec(bv); + Q.fromBitVec(bv); + CYBOZU_TEST_EQUAL(P, Q); + } + P.clear(); + { + cybozu::BitVector bv; + P.appendToBitVec(bv); + Q.fromBitVec(bv); + CYBOZU_TEST_EQUAL(P, Q); + } + } + void str() const + { + puts("test str"); + const Fp x(para.gx); + const Fp y(para.gy); + Ec P(x, y); + Ec Q; + // not compressed + Ec::setCompressedExpression(false); + { + std::stringstream ss; + ss << P; + ss >> Q; + CYBOZU_TEST_EQUAL(P, Q); + } + { + P = -P; + std::stringstream ss; + ss << P; + ss >> Q; + CYBOZU_TEST_EQUAL(P, Q); + } + P.clear(); + { + std::stringstream ss; + ss << P; + ss >> Q; + CYBOZU_TEST_EQUAL(P, Q); + } + // compressed + Ec::setCompressedExpression(true); + P.set(x, y); + { + std::stringstream ss; + ss << P; + ss >> Q; + CYBOZU_TEST_EQUAL(P, Q); + } + { + P = -P; + std::stringstream ss; + ss << P; + ss >> Q; + CYBOZU_TEST_EQUAL(P, Q); + } + P.clear(); + { + std::stringstream ss; + ss << P; + ss >> Q; + CYBOZU_TEST_EQUAL(P, Q); + } + } + + template + void test(F f, const char *msg) const + { + const int N = 300000; + Fp x(para.gx); + Fp y(para.gy); + Ec P(x, y); + Ec Q = P + P + P; + clock_t begin = clock(); + for (int i = 0; i < N; i++) { + f(Q, P, Q); + } + clock_t end = clock(); + printf("%s %.2fusec\n", msg, (end - begin) / double(CLOCKS_PER_SEC) / N * 1e6); + } + /* + add 8.71usec -> 6.94 + sub 6.80usec -> 4.84 + dbl 9.59usec -> 7.75 + pos 2730usec -> 2153 + */ + void bench() const + { + Fp x(para.gx); + Fp y(para.gy); + Ec P(x, y); + Ec Q = P + P + P; + CYBOZU_BENCH("add", Ec::add, Q, P, Q); + CYBOZU_BENCH("sub", Ec::sub, Q, P, Q); + CYBOZU_BENCH("dbl", Ec::dbl, P, P); + Zn z("-3"); + CYBOZU_BENCH("pow", Ec::power, P, P, z); + } +/* +Affine : sandy-bridge +add 3.17usec +sub 2.43usec +dbl 3.32usec +pow 905.00usec +Jacobi +add 2.34usec +sub 2.65usec +dbl 1.56usec +pow 499.00usec +*/ + void run() const + { + cstr(); + ope(); + power(); + neg_power(); + power_fp(); + binaryExpression(); + squareRoot(); + str(); +#ifdef NDEBUG + bench(); +#endif + } +private: + Test(const Test&); + void operator=(const Test&); +}; + +template +void test_sub(const mcl::EcParam *para, size_t paraNum) +{ + for (size_t i = 0; i < paraNum; i++) { + puts(para[i].name); + Test(para[i]).run(); + } +} + +int g_partial = -1; + +CYBOZU_TEST_AUTO(all) +{ +#ifdef USE_MONT_FP + puts("use MontFp"); +#else + puts("use GMP"); +#endif + if (g_partial & (1 << 3)) { + const struct mcl::EcParam para3[] = { + // mcl::ecparam::p160_1, + mcl::ecparam::secp160k1, + mcl::ecparam::secp192k1, + mcl::ecparam::NIST_P192, + }; + test_sub(para3, CYBOZU_NUM_OF_ARRAY(para3)); + } + + if (g_partial & (1 << 4)) { + const struct mcl::EcParam para4[] = { + mcl::ecparam::secp224k1, + mcl::ecparam::secp256k1, + mcl::ecparam::NIST_P224, + mcl::ecparam::NIST_P256, + }; + test_sub(para4, CYBOZU_NUM_OF_ARRAY(para4)); + } + + if (g_partial & (1 << 6)) { + const struct mcl::EcParam para6[] = { + // mcl::ecparam::secp384r1, + mcl::ecparam::NIST_P384, + }; + test_sub(para6, CYBOZU_NUM_OF_ARRAY(para6)); + } + + if (g_partial & (1 << 9)) { + const struct mcl::EcParam para9[] = { + // mcl::ecparam::secp521r1, + mcl::ecparam::NIST_P521, + }; + test_sub(para9, CYBOZU_NUM_OF_ARRAY(para9)); + } +} + +int main(int argc, char *argv[]) +{ + if (argc == 1) { + g_partial = -1; + } else { + g_partial = 0; + for (int i = 1; i < argc; i++) { + g_partial |= 1 << atoi(argv[i]); + } + } + return cybozu::test::autoRun.run(argc, argv); +} diff --git a/test/fp_generator_test.cpp b/test/fp_generator_test.cpp new file mode 100644 index 0000000..9a61ab2 --- /dev/null +++ b/test/fp_generator_test.cpp @@ -0,0 +1,222 @@ +#include +#if CYBOZU_OS_BIT == 32 +// not support +#else +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef mcl::FpT<> Fp; + +const int MAX_N = 4; + +const char *primeTable[] = { + "7fffffffffffffffffffffffffffffff", // 127bit(not full) + "ffffffffffffffffffffffffffffff61", // 128bit(full) + "fffffffffffffffffffffffffffffffffffffffeffffee37", // 192bit(full) + "2523648240000001ba344d80000000086121000000000013a700000000000013", // 254bit(not full) +}; + +/* + p is output buffer + pStr is hex + return the size of p +*/ +int convertToArray(uint64_t *p, const mpz_class& x) +{ + const int pn = int(sizeof(mp_limb_t) * x.get_mpz_t()->_mp_size / sizeof(*p)); + if (pn > MAX_N) { + printf("pn(%d) is too large\n", pn); + exit(1); + } + const uint64_t *q = (const uint64_t*)x.get_mpz_t()->_mp_d; + std::copy(q, q + pn, p); + std::fill(p + pn, p + MAX_N, 0); + return pn; +} +int convertToArray(uint64_t *p, const char *pStr) +{ + mpz_class x; + x.set_str(pStr, 16); + return convertToArray(p, x); +} + +struct Int { + int vn; + uint64_t v[MAX_N]; + Int() + : vn(0) + { + } + explicit Int(int vn) + { + if (vn > MAX_N) { + printf("vn(%d) is too large\n", vn); + exit(1); + } + this->vn = vn; + } + void set(const char *str) { fromStr(str); } + void set(const Fp& rhs) + { + convertToArray(v, rhs.toGmp()); + } + void set(const uint64_t* x) + { + for (int i = 0; i < vn; i++) v[i] = x[i]; + } + void fromStr(const char *str) + { + convertToArray(v, str); + } + std::string toStr() const + { + std::string ret; + for (int i = 0; i < vn; i++) { + ret += cybozu::itohex(v[vn - 1 - i], false); + } + return ret; + } + void put(const char *msg = "") const + { + if (msg) printf("%s=", msg); + printf("%s\n", toStr().c_str()); + } + bool operator==(const Int& rhs) const + { + if (vn != rhs.vn) return false; + for (int i = 0; i < vn; i++) { + if (v[i] != rhs.v[i]) return false; + } + return true; + } + bool operator!=(const Int& rhs) const { return !operator==(rhs); } + bool operator==(const Fp& rhs) const + { + Int t(vn); + t.set(rhs); + return operator==(t); + } + bool operator!=(const Fp& rhs) const { return !operator==(rhs); } +}; +static inline std::ostream& operator<<(std::ostream& os, const Int& x) +{ + return os << x.toStr(); +} + +void testAddSub(const mcl::FpGenerator& fg, int pn) +{ + Fp x, y; + Int mx(pn), my(pn); + x.fromStr("0x8811aabb23427cc"); + y.fromStr("0x8811aabb23427cc11"); + mx.set(x); + my.set(y); + for (int i = 0; i < 30; i++) { + CYBOZU_TEST_EQUAL(mx, x); + x += x; + fg.add_(mx.v, mx.v, mx.v); + } + for (int i = 0; i < 30; i++) { + CYBOZU_TEST_EQUAL(mx, x); + x += y; + fg.add_(mx.v, mx.v, my.v); + } + for (int i = 0; i < 30; i++) { + CYBOZU_TEST_EQUAL(my, y); + y -= x; + fg.sub_(my.v, my.v, mx.v); + } +} + +void testNeg(const mcl::FpGenerator& fg, int pn) +{ + Fp x; + Int mx(pn), my(pn); + const char *tbl[] = { + "0", + "0x12346", + "0x11223344556677881122334455667788", + "0x0abbccddeeffaabb0000000000000000", + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + x.fromStr(tbl[i]); + mx.set(x); + x = -x; + fg.neg_(mx.v, mx.v); + CYBOZU_TEST_EQUAL(mx, x); + } +} + +void testMulI(const mcl::FpGenerator& fg, int pn) +{ + cybozu::XorShift rg; + for (int i = 0; i < 100; i++) { + uint64_t x[MAX_N]; + uint64_t z[MAX_N + 1]; + rg.read(x, pn); + uint64_t y = rg.get64(); + mpz_class mx; + mcl::Gmp::setRaw(mx, x, pn); + mpz_class my; + mcl::Gmp::set(my, y); + mx *= my; + uint64_t d = fg.mulI_(z, x, y); + z[pn] = d; + mcl::Gmp::setRaw(my, z, pn + 1); + CYBOZU_TEST_EQUAL(mx, my); + } + { + uint64_t x[MAX_N]; + uint64_t z[MAX_N + 1]; + rg.read(x, pn); + uint64_t y = rg.get64(); + CYBOZU_BENCH_C("mulI", 10000000, fg.mulI_, z, x, y); + } +} + +void testShr1(const mcl::FpGenerator& fg, int pn) +{ + cybozu::XorShift rg; + for (int i = 0; i < 100; i++) { + uint64_t x[MAX_N]; + uint64_t z[MAX_N]; + rg.read(x, pn); + mpz_class mx; + mcl::Gmp::setRaw(mx, x, pn); + mx >>= 1; + fg.shr1_(z, x); + mpz_class my; + mcl::Gmp::setRaw(my, z, pn); + CYBOZU_TEST_EQUAL(mx, my); + } +} + +void test(const char *pStr) +{ + Fp::setModulo(pStr, 16); + uint64_t p[MAX_N]; + const int pn = convertToArray(p, pStr); + printf("pn=%d\n", pn); + mcl::FpGenerator fg; + fg.init(p, pn); + testAddSub(fg, pn); + testNeg(fg, pn); + testMulI(fg, pn); + testShr1(fg, pn); +} + +CYBOZU_TEST_AUTO(all) +{ + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(primeTable); i++) { + printf("test prime i=%d\n", (int)i); + test(primeTable[i]); + } +} +#endif diff --git a/test/fp_test.cpp b/test/fp_test.cpp new file mode 100644 index 0000000..aac80a1 --- /dev/null +++ b/test/fp_test.cpp @@ -0,0 +1,465 @@ +#define PUT(x) std::cout << #x "=" << (x) << std::endl +#include +#include +#include +#include + +#ifdef _MSC_VER + #pragma warning(disable: 4127) // const condition +#endif + +typedef mcl::FpT<> Fp; + +const int m = 65537; +struct Init { + Init() + { + std::ostringstream ms; + ms << m; + Fp::setModulo(ms.str()); + } +}; + +CYBOZU_TEST_SETUP_FIXTURE(Init); + +#ifndef MCL_ONLY_BENCH +CYBOZU_TEST_AUTO(cstr) +{ + const struct { + const char *str; + int val; + } tbl[] = { + { "0", 0 }, + { "1", 1 }, + { "123", 123 }, + { "0x123", 0x123 }, + { "0b10101", 21 }, + { "-123", m - 123 }, + { "-0x123", m - 0x123 }, + { "-0b10101", m - 21 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + // string cstr + Fp x(tbl[i].str); + CYBOZU_TEST_EQUAL(x, tbl[i].val); + + // int cstr + Fp y(tbl[i].val); + CYBOZU_TEST_EQUAL(y, x); + + // copy cstr + Fp z(x); + CYBOZU_TEST_EQUAL(z, x); + + // assign int + Fp w; + w = tbl[i].val; + CYBOZU_TEST_EQUAL(w, x); + + // assign self + Fp u; + u = w; + CYBOZU_TEST_EQUAL(u, x); + + // conv + std::ostringstream os; + os << tbl[i].val; + + std::string str; + x.toStr(str); + CYBOZU_TEST_EQUAL(str, os.str()); + } +} + +CYBOZU_TEST_AUTO(fromStr) +{ + const struct { + const char *in; + int out; + int base; + } tbl[] = { + { "100", 100, 0 }, // set base = 10 if base = 0 + { "100", 4, 2 }, + { "100", 256, 16 }, + { "0b100", 4, 0 }, + { "0b100", 4, 2 }, + { "0x100", 256, 0 }, + { "0x100", 256, 16 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + Fp x; + x.fromStr(tbl[i].in, tbl[i].base); + CYBOZU_TEST_EQUAL(x, tbl[i].out); + } + // conflict prefix with base + Fp x; + CYBOZU_TEST_EXCEPTION(x.fromStr("0b100", 16), cybozu::Exception); + CYBOZU_TEST_EXCEPTION(x.fromStr("0x100", 2), cybozu::Exception); +} + +CYBOZU_TEST_AUTO(stream) +{ + const struct { + const char *in; + int out10; + int out16; + } tbl[] = { + { "100", 100, 256 }, // set base = 10 if base = 0 + { "0x100", 256, 256 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + { + std::istringstream is(tbl[i].in); + Fp x; + is >> x; + CYBOZU_TEST_EQUAL(x, tbl[i].out10); + } + { + std::istringstream is(tbl[i].in); + Fp x; + is >> std::hex >> x; + CYBOZU_TEST_EQUAL(x, tbl[i].out16); + } + } + std::istringstream is("0b100"); + Fp x; + CYBOZU_TEST_EXCEPTION(is >> std::hex >> x, cybozu::Exception); +} + +CYBOZU_TEST_AUTO(conv) +{ + const char *bin = "0b1001000110100"; + const char *hex = "0x1234"; + const char *dec = "4660"; + Fp b(bin); + Fp h(hex); + Fp d(dec); + CYBOZU_TEST_EQUAL(b, h); + CYBOZU_TEST_EQUAL(b, d); + + std::string str; + b.toStr(str, 2, true); + CYBOZU_TEST_EQUAL(str, bin); + b.toStr(str); + CYBOZU_TEST_EQUAL(str, dec); + b.toStr(str, 16, true); + CYBOZU_TEST_EQUAL(str, hex); +} + +CYBOZU_TEST_AUTO(compare) +{ + const struct { + int lhs; + int rhs; + int cmp; + } tbl[] = { + { 0, 0, 0 }, + { 1, 0, 1 }, + { 0, 1, -1 }, + { -1, 0, 1 }, // m-1, 0 + { 0, -1, -1 }, // 0, m-1 + { 123, 456, -1 }, + { 456, 123, 1 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const Fp x(tbl[i].lhs); + const Fp y(tbl[i].rhs); + const int cmp = tbl[i].cmp; + if (cmp == 0) { + CYBOZU_TEST_EQUAL(x, y); + } else { + CYBOZU_TEST_ASSERT(x != y); + } + } + { + Fp x(5); + CYBOZU_TEST_ASSERT(x == 5); + } +} + +CYBOZU_TEST_AUTO(modulo) +{ + std::ostringstream ms; + ms << m; + + std::string str; + Fp::getModulo(str); + CYBOZU_TEST_EQUAL(str, ms.str()); +} + +CYBOZU_TEST_AUTO(ope) +{ + const struct { + int x; + int y; + int add; // x + y + int sub; // x - y + int mul; // x * y + int sqr; // x^2 + } tbl[] = { + { 0, 1, 1, m - 1, 0, 0 }, + { 9, 5, 14, 4, 45, 81 }, + { 10, 13, 23, m - 3, 130, 100 }, + { 2000, 1000, 3000, 1000, (2000 * 1000) % m, (2000 * 2000) % m }, + { 12345, 9999, 12345 + 9999, 12345 - 9999, (12345 * 9999) % m, (12345 * 12345) % m }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const Fp x(tbl[i].x); + const Fp y(tbl[i].y); + Fp z; + Fp::add(z, x, y); + CYBOZU_TEST_EQUAL(z, tbl[i].add); + Fp::sub(z, x, y); + CYBOZU_TEST_EQUAL(z, tbl[i].sub); + Fp::mul(z, x, y); + CYBOZU_TEST_EQUAL(z, tbl[i].mul); + + Fp r; + Fp::inv(r, y); + Fp::mul(z, z, r); + CYBOZU_TEST_EQUAL(z, tbl[i].x); + z = x + y; + CYBOZU_TEST_EQUAL(z, tbl[i].add); + z = x - y; + CYBOZU_TEST_EQUAL(z, tbl[i].sub); + z = x * y; + CYBOZU_TEST_EQUAL(z, tbl[i].mul); + + Fp::square(z, x); + CYBOZU_TEST_EQUAL(z, tbl[i].sqr); + + z = x / y; + z *= y; + CYBOZU_TEST_EQUAL(z, tbl[i].x); + } +} + +struct tag2; + +CYBOZU_TEST_AUTO(power) +{ + Fp x, y, z; + x = 12345; + z = 1; + for (int i = 0; i < 100; i++) { + Fp::power(y, x, i); + CYBOZU_TEST_EQUAL(y, z); + z *= x; + } + typedef mcl::FpT Fp2; + Fp2::setModulo("1009"); + x = 5; + Fp2 n = 3; + z = 3; + Fp::power(x, x, z); + CYBOZU_TEST_EQUAL(x, 125); + x = 5; + Fp::power(x, x, n); + CYBOZU_TEST_EQUAL(x, 125); +} + +CYBOZU_TEST_AUTO(power_fp) +{ + Fp x, y, z; + x = 12345; + z = 1; + for (int i = 0; i < 100; i++) { + Fp::power(y, x, Fp(i)); + CYBOZU_TEST_EQUAL(y, z); + z *= x; + } +} + +struct TagAnother; + +CYBOZU_TEST_AUTO(another) +{ + typedef mcl::FpT G; + G::setModulo("13"); + G a = 3; + G b = 9; + a *= b; + CYBOZU_TEST_EQUAL(a, 1); +} + + +CYBOZU_TEST_AUTO(setRaw) +{ + Fp::setModulo("1000000000000000000117"); + char b1[] = { 0x56, 0x34, 0x12 }; + Fp x; + x.setRaw(b1, 3); + CYBOZU_TEST_EQUAL(x, 0x123456); + int b2[] = { 0x12, 0x34 }; + x.setRaw(b2, 2); + CYBOZU_TEST_EQUAL(x, Fp("0x3400000012")); + x.fromStr("0xffffffffffff"); + + Fp::setModulo("0x10000000000001234567a5"); + const struct { + uint32_t buf[3]; + size_t bufN; + const char *expected; + } tbl[] = { + { { 0x234567a4, 0x00000001, 0x00100000}, 1, "0x234567a4" }, + { { 0x234567a4, 0x00000001, 0x00100000}, 2, "0x1234567a4" }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + x.setRaw(tbl[i].buf, tbl[i].bufN); + CYBOZU_TEST_EQUAL(x, Fp(tbl[i].expected)); + } + uint32_t large[3] = { 0x234567a5, 0x00000001, 0x00100000}; + CYBOZU_TEST_EXCEPTION(x.setRaw(large, 3), cybozu::Exception); +} + + +CYBOZU_TEST_AUTO(set64bit) +{ + Fp::setModulo("0x1000000000000000000f"); + const struct { + const char *p; + int64_t i; + } tbl[] = { + { "0x1234567812345678", int64_t(0x1234567812345678ull) }, + { "0xfffedcba987edcba997", -int64_t(0x1234567812345678ull) }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + Fp x(tbl[i].p); + Fp y(tbl[i].i); + CYBOZU_TEST_EQUAL(x, y); + } +} + +CYBOZU_TEST_AUTO(getRaw) +{ + const struct { + const char *s; + uint32_t v[4]; + size_t vn; + } tbl[] = { + { "0", { 0, 0, 0, 0 }, 1 }, + { "1234", { 1234, 0, 0, 0 }, 1 }, + { "0xaabbccdd12345678", { 0x12345678, 0xaabbccdd, 0, 0 }, 2 }, + { "0x11112222333344445555666677778888", { 0x77778888, 0x55556666, 0x33334444, 0x11112222 }, 4 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + mpz_class x(tbl[i].s); + const size_t bufN = 8; + uint32_t buf[bufN]; + size_t n = mcl::Gmp::getRaw(buf, bufN, x); + CYBOZU_TEST_EQUAL(n, tbl[i].vn); + CYBOZU_TEST_EQUAL_ARRAY(buf, tbl[i].v, n); + } +} + +CYBOZU_TEST_AUTO(toStr) +{ + const char *tbl[] = { + "0x0", + "0x5", + "0x123", + "0x123456789012345679adbc", + "0xffffffff26f2fc170f69466a74defd8d", + "0x100000000000000000000000000000033", + "0x11ee12312312940000000000000000000000000002342343" + }; + Fp::setModulo("0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d"); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + mpz_class x(tbl[i]); + Fp y(tbl[i]); + std::string xs, ys; + mcl::Gmp::toStr(xs, x, 16); + y.toStr(ys, 16); + CYBOZU_TEST_EQUAL(xs, ys); + } +} + +CYBOZU_TEST_AUTO(binaryRepl) +{ + const struct { + const char *s; + size_t n; + uint32_t v[6]; + } tbl[] = { + { "0", 0, { 0, 0, 0, 0, 0 } }, + { "1234", 1, { 1234, 0, 0, 0, 0 } }, + { "0xaabbccdd12345678", 2, { 0x12345678, 0xaabbccdd, 0, 0, 0 } }, + { "0x11112222333344445555666677778888", 4, { 0x77778888, 0x55556666, 0x33334444, 0x11112222, 0 } }, + { "0x9911112222333344445555666677778888", 5, { 0x77778888, 0x55556666, 0x33334444, 0x11112222, 0x99, 0 } }, + }; + Fp::setModulo("0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d"); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + Fp x(tbl[i].s); + cybozu::BitVector bv; + x.appendToBitVec(bv); + CYBOZU_TEST_EQUAL(bv.size(), Fp::getModBitLen()); + CYBOZU_TEST_EQUAL(bv.size(), Fp::getBitVecSize()); + const Fp::BlockType *block = bv.getBlock(); + if (sizeof(Fp::BlockType) == 4) { + CYBOZU_TEST_EQUAL_ARRAY(block, tbl[i].v, tbl[i].n); + } else { + const size_t n = (tbl[i].n + 1) / 2; + for (size_t j = 0; j < n; j++) { + uint64_t v = (uint64_t(tbl[i].v[j * 2 + 1]) << 32) | tbl[i].v[j * 2]; + CYBOZU_TEST_EQUAL(block[j], v); + } + } + Fp y; + y.fromBitVec(bv); + CYBOZU_TEST_EQUAL(x, y); + } +} +#endif + +#ifdef NDEBUG +void benchSub(const char *pStr, const char *xStr, const char *yStr) + try +{ + Fp::setModulo(pStr); + Fp x(xStr); + Fp y(yStr); + + CYBOZU_BENCH("add", Fp::add, x, x, x); + CYBOZU_BENCH("sub", Fp::sub, x, x, y); + CYBOZU_BENCH("mul", Fp::mul, x, x, x); + CYBOZU_BENCH("square", Fp::square, x, x); + CYBOZU_BENCH("inv", x += y;Fp::inv, x, x); // avoid same jmp + CYBOZU_BENCH("div", x += y;Fp::div, x, y, x); + puts(""); +} catch (std::exception& e) { + printf("ERR %s\n", e.what()); +} + +// square 76clk@sandy +CYBOZU_TEST_AUTO(bench3) +{ + const char *pStr = "0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d"; + const char *xStr = "0x148094810948190412345678901234567900342423332197"; + const char *yStr = "0x7fffffffffffffffffffffe26f2fc170f69466a74defd8d"; + benchSub(pStr, xStr, yStr); +} + +CYBOZU_TEST_AUTO(bench4) +{ + const char *pStr = "0x2523648240000001ba344d80000000086121000000000013a700000000000013"; + const char *xStr = "0x1480948109481904123456789234234242423424201234567900342423332197"; + const char *yStr = "0x151342342342341517fffffffffffffffffffffe26f2fc170f69466a74defd8d"; + benchSub(pStr, xStr, yStr); +} + +CYBOZU_TEST_AUTO(bench6) +{ + const char *pStr = "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff"; + const char *xStr = "0x19481084109481094820948209482094820984290482212345678901234567900342308472047204720422423332197"; + const char *yStr = "0x209348209481094820984209842094820948204204243123456789012345679003423084720472047204224233321972"; + benchSub(pStr, xStr, yStr); +} + +CYBOZU_TEST_AUTO(bench9) +{ + const char *pStr = "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"; + const char *xStr = "0x2908209582095820941098410948109482094820984209840294829049240294242498540975555312345678901234567900342308472047204720422423332197"; + const char *yStr = "0x3948384209834029834092384204920349820948205872380573205782385729385729385723985837ffffffffffffffffffffffe26f2fc170f69466a74defd8d"; + benchSub(pStr, xStr, yStr); +} +#endif diff --git a/test/fp_util_test.cpp b/test/fp_util_test.cpp new file mode 100644 index 0000000..28d94ed --- /dev/null +++ b/test/fp_util_test.cpp @@ -0,0 +1,191 @@ +#define PUT(x) std::cout << #x "=" << (x) << std::endl +#include +#include + +CYBOZU_TEST_AUTO(toStr16) +{ + const struct { + uint32_t x[4]; + size_t n; + const char *str; + } tbl[] = { + { { 0, 0, 0, 0 }, 0, "0" }, + { { 0x123, 0, 0, 0 }, 1, "123" }, + { { 0x12345678, 0xaabbcc, 0, 0 }, 2, "aabbcc12345678" }, + { { 0, 0x12, 0x234a, 0 }, 3, "234a0000001200000000" }, + { { 1, 2, 0xffffffff, 0x123abc }, 4, "123abcffffffff0000000200000001" }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + std::string str; + mcl::fp::toStr16(str, tbl[i].x, tbl[i].n, false); + CYBOZU_TEST_EQUAL(str, tbl[i].str); + mcl::fp::toStr16(str, tbl[i].x, tbl[i].n, true); + CYBOZU_TEST_EQUAL(str, std::string("0x") + tbl[i].str); + } +} + +// CYBOZU_TEST_AUTO(toStr2) // QQQ +// CYBOZU_TEST_AUTO(verifyStr) // QQQ + +CYBOZU_TEST_AUTO(fromStr16) +{ + const struct { + const char *str; + uint64_t x[4]; + } tbl[] = { + { "0", { 0, 0, 0, 0 } }, + { "5", { 5, 0, 0, 0 } }, + { "123", { 0x123, 0, 0, 0 } }, + { "123456789012345679adbc", { uint64_t(0x789012345679adbcull), 0x123456, 0, 0 } }, + { "ffffffff26f2fc170f69466a74defd8d", { uint64_t(0x0f69466a74defd8dull), uint64_t(0xffffffff26f2fc17ull), 0, 0 } }, + { "100000000000000000000000000000033", { uint64_t(0x0000000000000033ull), 0, 1, 0 } }, + { "11ee12312312940000000000000000000000000002342343", { uint64_t(0x0000000002342343ull), uint64_t(0x0000000000000000ull), uint64_t(0x11ee123123129400ull), 0 } }, + { "1234567890abcdefABCDEF123456789aba32134723424242424", { uint64_t(0x2134723424242424ull), uint64_t(0xDEF123456789aba3ull), uint64_t(0x4567890abcdefABCull), 0x123 } }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const size_t xN = 4; + uint64_t x[xN]; + mcl::fp::fromStr16(x, xN, tbl[i].str, strlen(tbl[i].str)); + for (size_t j = 0; j < xN; j++) { + CYBOZU_TEST_EQUAL(x[j], tbl[i].x[j]); + } + } +} + +CYBOZU_TEST_AUTO(compareArray) +{ + const struct { + uint32_t a[4]; + uint32_t b[4]; + size_t n; + int expect; + } tbl[] = { + { { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, 0, 0 }, + { { 1, 0, 0, 0 }, { 0, 0, 0, 0 }, 1, 1 }, + { { 0, 0, 0, 0 }, { 1, 0, 0, 0 }, 1, -1 }, + { { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, 1, 0 }, + { { 3, 1, 1, 0 }, { 2, 1, 1, 0 }, 4, 1 }, + { { 9, 2, 1, 1 }, { 1, 3, 1, 1 }, 4, -1 }, + { { 1, 7, 8, 4 }, { 1, 7, 8, 9 }, 3, 0 }, + { { 1, 7, 8, 4 }, { 1, 7, 8, 9 }, 4, -1 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + int e = mcl::fp::compareArray(tbl[i].a, tbl[i].b, tbl[i].n); + CYBOZU_TEST_EQUAL(e, tbl[i].expect); + } +} + +struct Rand { + std::vector v; + size_t pos; + int count; + void read(uint32_t *x, size_t n) + { + if (v.size() < pos + n) throw cybozu::Exception("Rand:get:bad n") << v.size() << pos << n; + std::copy(v.begin() + pos, v.begin() + pos + n, x); + pos += n; + count++; + } + Rand(const uint32_t *x, size_t n) + : pos(0) + , count(0) + { + for (size_t i = 0; i < n; i++) { + v.push_back(x[i]); + } + } +}; + +CYBOZU_TEST_AUTO(getRandVal) +{ + const size_t rn = 8; + const struct { + uint32_t r[rn]; + uint32_t mod[2]; + size_t bitLen; + int count; + uint32_t expect[2]; + } tbl[] = { + { { 1, 2, 3, 4, 5, 6, 7, 8 }, { 5, 6 }, 64, 1, { 1, 2 } }, + { { 0xfffffffc, 0x7, 3, 4, 5, 6, 7, 8 }, { 0xfffffffe, 0x3 }, 34, 1, { 0xfffffffc, 0x3 } }, + { { 0xfffffffc, 0x7, 3, 4, 5, 6, 7, 8 }, { 0xfffffffb, 0x3 }, 34, 2, { 3, 0 } }, + { { 2, 3, 5, 7, 4, 3, 0, 3 }, { 1, 0x3 }, 34, 4, { 0, 3 } }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + Rand rg(tbl[i].r, rn); + uint32_t out[2]; + mcl::fp::getRandVal(out, rg, tbl[i].mod, tbl[i].bitLen); + CYBOZU_TEST_EQUAL(out[0], tbl[i].expect[0]); + CYBOZU_TEST_EQUAL(out[1], tbl[i].expect[1]); + CYBOZU_TEST_EQUAL(rg.count, tbl[i].count); + } +} + +CYBOZU_TEST_AUTO(shiftLeftOr) +{ + const struct { + uint32_t x[4]; + size_t n; + size_t shift; + uint32_t y; + uint32_t z[4]; + uint32_t ret; + } tbl[] = { + { { 0x12345678, 0, 0, 0 }, 1, 0, 0, { 0x12345678, 0, 0, 0 }, 0 }, + { { 0x12345678, 0, 0, 0 }, 1, 1, 0, { 0x2468acf0, 0, 0, 0 }, 0 }, + { { 0xf2345678, 0, 0, 0 }, 1, 1, 5, { 0xe468acf5, 0, 0, 0 }, 1 }, + { { 0x12345678, 0x9abcdef0, 0x11112222, 0xffccaaee }, 4, 19, 0x1234, { 0xb3c01234, 0xf78091a2, 0x1114d5e6, 0x57708889 }, 0x7fe65 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + uint32_t z[4]; + uint32_t ret = mcl::fp::shiftLeftOr(z, tbl[i].x, tbl[i].n, tbl[i].shift, tbl[i].y); + CYBOZU_TEST_EQUAL_ARRAY(z, tbl[i].z, tbl[i].n); + CYBOZU_TEST_EQUAL(ret, tbl[i].ret); + } +} + +CYBOZU_TEST_AUTO(shiftRight) +{ + const struct { + uint32_t x[4]; + size_t n; + size_t shift; + uint32_t z[4]; + } tbl[] = { + { { 0x12345678, 0, 0, 0 }, 4, 0, { 0x12345678, 0, 0, 0 } }, + { { 0x12345678, 0xaaaabbbb, 0xffeebbcc, 0xfeba9874 }, 4, 1, { 0x891a2b3c, 0x55555ddd, 0x7ff75de6, 0x7f5d4c3a } }, + { { 0x12345678, 0xaaaabbbb, 0xffeebbcc, 0xfeba9874 }, 4, 18, { 0xaeeec48d, 0xaef32aaa, 0xa61d3ffb, 0x3fae } }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + uint32_t z[4]; + mcl::fp::shiftRight(z, tbl[i].x, tbl[i].n, tbl[i].shift); + CYBOZU_TEST_EQUAL_ARRAY(z, tbl[i].z, tbl[i].n); + } +} + +CYBOZU_TEST_AUTO(splitBitVec) +{ + uint32_t tbl[] = { 0x12345678, 0xaaaabbbb, 0xffeebbcc }; + typedef cybozu::BitVectorT BitVec; + typedef std::vector IntVec; + BitVec bv; + bv.append(tbl, sizeof(tbl) * 8); + for (size_t len = bv.size(); len > 0; len--) { + bv.resize(len); + for (size_t w = 1; w < 18; w++) { + IntVec iv; + size_t last = mcl::fp::splitBitVec(iv, bv, w); + size_t q = len / w; + size_t r = len % w; + if (r == 0) { + r = w; + } else { + q++; + } + CYBOZU_TEST_EQUAL(iv.size(), q); + BitVec bv2; + mcl::fp::concatBitVec(bv2, iv, w, last); + CYBOZU_TEST_ASSERT(bv == bv2); + } + } +} diff --git a/test/mk32.sh b/test/mk32.sh new file mode 100644 index 0000000..4d5f607 --- /dev/null +++ b/test/mk32.sh @@ -0,0 +1 @@ +g++ -O3 -march=native base_test.cpp ../src/x86.s -m32 -I ~/32/include/ -I ../include/ -I ../../xbyak/ -I ../../cybozulib/include ~/32/lib/libgmp.a ~/32/lib/libgmpxx.a -I ~/32/lib -DNDEBUG diff --git a/test/mont_fp_test.cpp b/test/mont_fp_test.cpp new file mode 100644 index 0000000..6e13e4d --- /dev/null +++ b/test/mont_fp_test.cpp @@ -0,0 +1,809 @@ +#define PUT(x) std::cout << #x "=" << (x) << std::endl +#include +#include +#include + +#define USE_MONT_FP +#include +typedef mcl::FpT<> Zn; +typedef mcl::FpT<> MontFp3; +typedef mcl::FpT<> MontFp4; +typedef mcl::FpT<> MontFp6; +typedef mcl::FpT<> MontFp9; + +struct Montgomery { + typedef mcl::Gmp::BlockType BlockType; + mpz_class p_; + mpz_class R_; // (1 << (pn_ * 64)) % p + mpz_class RR_; // (R * R) % p + BlockType pp_; // p * pp = -1 mod M = 1 << 64 + size_t pn_; + Montgomery() {} + explicit Montgomery(const mpz_class& p) + { + p_ = p; + pp_ = mcl::montgomery::getCoff(mcl::Gmp::getBlock(p, 0)); + pn_ = mcl::Gmp::getBlockSize(p); + R_ = 1; + R_ = (R_ << (pn_ * 64)) % p_; + RR_ = (R_ * R_) % p_; + } + + void toMont(mpz_class& x) const { mul(x, x, RR_); } + void fromMont(mpz_class& x) const { mul(x, x, 1); } + + void mul(mpz_class& z, const mpz_class& x, const mpz_class& y) const + { +#if 0 + const size_t ySize = mcl::Gmp::getBlockSize(y); + mpz_class c = x * mcl::Gmp::getBlock(y, 0); + BlockType q = mcl::Gmp::getBlock(c, 0) * pp_; + c += p_ * q; + c >>= sizeof(BlockType) * 8; + for (size_t i = 1; i < pn_; i++) { + if (i < ySize) { + c += x * mcl::Gmp::getBlock(y, i); + } + BlockType q = mcl::Gmp::getBlock(c, 0) * pp_; + c += p_ * q; + c >>= sizeof(BlockType) * 8; + } + if (c >= p_) { + c -= p_; + } + z = c; +#else + z = x * y; + for (size_t i = 0; i < pn_; i++) { + BlockType q = mcl::Gmp::getBlock(z, 0) * pp_; + z += p_ * (mp_limb_t)q; + z >>= sizeof(BlockType) * 8; + } + if (z >= p_) { + z -= p_; + } +#endif + } +}; + +template +mpz_class toGmp(const T& x) +{ + std::string str = x.toStr(); + mpz_class t; + mcl::Gmp::fromStr(t, str); + return t; +} + +template +std::string toStr(const T& x) +{ + std::ostringstream os; + os << x; + return os.str(); +} + +template +T castTo(const U& x) +{ + T t; + t.fromStr(toStr(x)); + return t; +} + +template +void putRaw(const T& x) +{ + const uint64_t *p = x.getInnerValue(); + for (size_t i = 0, n = T::BlockSize; i < n; i++) { + printf("%016llx", p[n - 1 - i]); + } + printf("\n"); +} + +template +void put(const uint64_t (&x)[N]) +{ + for (size_t i = 0; i < N; i++) { + printf("%016llx", x[N - 1 - i]); + } + printf("\n"); +} + +template +struct Test { + typedef mcl::FpT<> Fp; + mpz_class m; + void run(const char *p) + { + Fp::setModulo(p); + m = p; + Zn::setModulo(p); + edge(); + cstr(); + toStr(); + fromStr(); + stream(); + conv(); + compare(); + modulo(); + ope(); + cvtInt(); + power(); + neg_power(); + power_Zn(); + setRaw(); + set64bit(); + getRaw(); + binaryExp(); + bench(); + } + void cstr() + { + const struct { + const char *str; + int val; + } tbl[] = { + { "0", 0 }, + { "1", 1 }, + { "123", 123 }, + { "0x123", 0x123 }, + { "0b10101", 21 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + // string cstr + Fp x(tbl[i].str); + CYBOZU_TEST_EQUAL(x, tbl[i].val); + + // int cstr + Fp y(tbl[i].val); + CYBOZU_TEST_EQUAL(y, x); + + // copy cstr + Fp z(x); + CYBOZU_TEST_EQUAL(z, x); + + // assign int + Fp w; + w = tbl[i].val; + CYBOZU_TEST_EQUAL(w, x); + + // assign self + Fp u; + u = w; + CYBOZU_TEST_EQUAL(u, x); + + // conv + std::ostringstream os; + os << tbl[i].val; + + std::string str; + x.toStr(str); + CYBOZU_TEST_EQUAL(str, os.str()); + } + const struct { + const char *str; + int val; + } tbl2[] = { + { "-123", 123 }, + { "-0x123", 0x123 }, + { "-0b10101", 21 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl2); i++) { + Fp x(tbl2[i].str); + x = -x; + CYBOZU_TEST_EQUAL(x, tbl2[i].val); + } + } + void toStr() + { + Fp x(0); + std::string str; + str = x.toStr(); + CYBOZU_TEST_EQUAL(str, "0"); + str = x.toStr(2, true); + CYBOZU_TEST_EQUAL(str, "0"); + str = x.toStr(2, false); + CYBOZU_TEST_EQUAL(str, "0"); + str = x.toStr(16, true); + CYBOZU_TEST_EQUAL(str, "0"); + str = x.toStr(16, false); + CYBOZU_TEST_EQUAL(str, "0"); + + x = 123; + str = x.toStr(); + CYBOZU_TEST_EQUAL(str, "123"); + str = x.toStr(2, true); + CYBOZU_TEST_EQUAL(str, "0b1111011"); + str = x.toStr(2, false); + CYBOZU_TEST_EQUAL(str, "1111011"); + str = x.toStr(16, true); + CYBOZU_TEST_EQUAL(str, "0x7b"); + str = x.toStr(16, false); + CYBOZU_TEST_EQUAL(str, "7b"); + + { + std::ostringstream os; + os << x; + CYBOZU_TEST_EQUAL(os.str(), "123"); + } + { + std::ostringstream os; + os << std::hex << std::showbase << x; + CYBOZU_TEST_EQUAL(os.str(), "0x7b"); + } + { + std::ostringstream os; + os << std::hex << x; + CYBOZU_TEST_EQUAL(os.str(), "7b"); + } + } + + void fromStr() + { + const struct { + const char *in; + int out; + int base; + } tbl[] = { + { "100", 100, 0 }, // set base = 10 if base = 0 + { "100", 4, 2 }, + { "100", 256, 16 }, + { "0b100", 4, 0 }, + { "0b100", 4, 2 }, + { "0x100", 256, 0 }, + { "0x100", 256, 16 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + Fp x; + x.fromStr(tbl[i].in, tbl[i].base); + CYBOZU_TEST_EQUAL(x, tbl[i].out); + } + // conflict prefix with base + Fp x; + CYBOZU_TEST_EXCEPTION(x.fromStr("0b100", 16), cybozu::Exception); + CYBOZU_TEST_EXCEPTION(x.fromStr("0x100", 2), cybozu::Exception); + } + + void stream() + { + const struct { + const char *in; + int out10; + int out16; + } tbl[] = { + { "100", 100, 256 }, // set base = 10 if base = 0 + { "0x100", 256, 256 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + { + std::istringstream is(tbl[i].in); + Fp x; + is >> x; + CYBOZU_TEST_EQUAL(x, tbl[i].out10); + } + { + std::istringstream is(tbl[i].in); + Fp x; + is >> std::hex >> x; + CYBOZU_TEST_EQUAL(x, tbl[i].out16); + } + } + std::istringstream is("0b100"); + Fp x; + CYBOZU_TEST_EXCEPTION(is >> std::hex >> x, cybozu::Exception); + } + void edge() + { +#if 0 + std::cout << std::hex; + /* + real mont + 0 0 + 1 R^-1 + R 1 + -1 -R^-1 + -R -1 + */ + mpz_class t = 1; + const mpz_class R = (t << (N * 64)) % m; + const mpz_class tbl[] = { + 0, 1, R, m - 1, m - R + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const mpz_class& x = tbl[i]; + for (size_t j = i; j < CYBOZU_NUM_OF_ARRAY(tbl); j++) { + const mpz_class& y = tbl[j]; + mpz_class z = (x * y) % m; + Fp xx, yy; + Fp::toMont(xx, x); + Fp::toMont(yy, y); + Fp zz = xx * yy; + mpz_class t; + Fp::fromMont(t, zz); + CYBOZU_TEST_EQUAL(z, t); + } + } + std::cout << std::dec; +#endif + } + + void conv() + { + const char *bin = "0b100100011010001010110011110001001000000010010001101000101011001111000100100000001001000110100010101100111100010010000"; + const char *hex = "0x123456789012345678901234567890"; + const char *dec = "94522879687365475552814062743484560"; + Fp b(bin); + Fp h(hex); + Fp d(dec); + CYBOZU_TEST_EQUAL(b, h); + CYBOZU_TEST_EQUAL(b, d); + + std::string str; + b.toStr(str, 2, true); + CYBOZU_TEST_EQUAL(str, bin); + b.toStr(str); + CYBOZU_TEST_EQUAL(str, dec); + b.toStr(str, 16, true); + CYBOZU_TEST_EQUAL(str, hex); + } + + void compare() + { + const struct { + int lhs; + int rhs; + int cmp; + } tbl[] = { + { 0, 0, 0 }, + { 1, 0, 1 }, + { 0, 1, -1 }, + { -1, 0, 1 }, // m-1, 0 + { 0, -1, -1 }, // 0, m-1 + { 123, 456, -1 }, + { 456, 123, 1 }, + { 5, 5, 0 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const Fp x(tbl[i].lhs); + const Fp y(tbl[i].rhs); + const int cmp = tbl[i].cmp; + if (cmp == 0) { + CYBOZU_TEST_EQUAL(x, y); + } else { + CYBOZU_TEST_ASSERT(x != y); + } + } + } + + void modulo() + { + std::ostringstream ms; + ms << m; + + std::string str; + Fp::getModulo(str); + CYBOZU_TEST_EQUAL(str, ms.str()); + } + + void ope() + { + const struct { + Zn x; + Zn y; + Zn add; // x + y + Zn sub; // x - y + Zn mul; // x * y + Zn sqr; // x * x + } tbl[] = { + { 0, 1, 1, -1, 0, 0 }, + { 9, 7, 16, 2, 63, 81 }, + { 10, 13, 23, -3, 130, 100 }, + { 2000, -1000, 1000, 3000, -2000000, 4000000 }, + { -12345, -9999, -(12345 + 9999), - 12345 + 9999, 12345 * 9999, 12345 * 12345 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const Fp x(castTo(tbl[i].x)); + const Fp y(castTo(tbl[i].y)); + Fp z; + Fp::add(z, x, y); + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].add)); + Fp::sub(z, x, y); + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].sub)); + Fp::mul(z, x, y); + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].mul)); + + Fp r; + Fp::inv(r, y); + Zn rr = 1 / tbl[i].y; + CYBOZU_TEST_EQUAL(r, castTo(rr)); + Fp::mul(z, z, r); + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].x)); + + z = x + y; + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].add)); + z = x - y; + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].sub)); + z = x * y; + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].mul)); + Fp::square(z, x); + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].sqr)); + + z = x / y; + z *= y; + CYBOZU_TEST_EQUAL(z, castTo(tbl[i].x)); + } + } + void cvtInt() + { +#if 0 + Fp x; + x = 12345; + uint64_t y = x.cvtInt(); + CYBOZU_TEST_EQUAL(y, 12345u); + x.fromStr("123456789012342342342342342"); + CYBOZU_TEST_EXCEPTION(x.cvtInt(), cybozu::Exception); + bool err = false; + CYBOZU_TEST_NO_EXCEPTION(x.cvtInt(&err)); + CYBOZU_TEST_ASSERT(err); +#endif + } + + void power() + { + Fp x, y, z; + x = 12345; + z = 1; + for (int i = 0; i < 100; i++) { + Fp::power(y, x, i); + CYBOZU_TEST_EQUAL(y, z); + z *= x; + } + } + + void neg_power() + { + Fp x, y, z; + x = 12345; + z = 1; + Fp rx = 1 / x; + for (int i = 0; i < 100; i++) { + Fp::power(y, x, -i); + CYBOZU_TEST_EQUAL(y, z); + z *= rx; + } + } + + void power_Zn() + { + Fp x, y, z; + x = 12345; + z = 1; + for (int i = 0; i < 100; i++) { + Fp::power(y, x, Zn(i)); + CYBOZU_TEST_EQUAL(y, z); + z *= x; + } + } + + void setRaw() + { + // QQQ +#if 0 + char b1[] = { 0x56, 0x34, 0x12 }; + Fp x; + x.setRaw(b1, 3); + CYBOZU_TEST_EQUAL(x, 0x123456); + int b2[] = { 0x12, 0x34 }; + x.setRaw(b2, 2); + CYBOZU_TEST_EQUAL(x, Fp("0x3400000012")); +#endif + } + void binaryExp() + { + puts("binaryExp"); + for (int i = 2; i < 7; i++) { + mpz_class g = m / i; + Fp x, y; +// Fp::toMont(x, g); + x.fromGmp(g); + cybozu::BitVector bv; + x.appendToBitVec(bv); + uint64_t buf[N]; + mcl::Gmp::getRaw(buf, N, g); + CYBOZU_TEST_EQUAL(bv.getBlockSize(), N); + CYBOZU_TEST_EQUAL(bv.size(), Fp::getModBitLen()); + CYBOZU_TEST_EQUAL(bv.size(), Fp::getBitVecSize()); + const uint64_t *p = bv.getBlock(); + CYBOZU_TEST_EQUAL_ARRAY(p, buf, N); + } + const mpz_class yy("0x1255556666777788881111222233334444"); + if (yy > m) { + return; + } + Fp y; +// Fp::toMont(y, yy); + y.fromGmp(yy); + uint64_t b1[N] = { uint64_t(0x1111222233334444ull), uint64_t(0x5555666677778888ull), 0x12 }; + Fp x; + cybozu::BitVector bv; + bv.append(b1, Fp::getModBitLen()); + x.fromBitVec(bv); + CYBOZU_TEST_EQUAL(x, y); + bv.clear(); + x.appendToBitVec(bv); + const uint64_t *b2 = bv.getBlock(); + CYBOZU_TEST_EQUAL_ARRAY(b1, b2, N); + } + + void set64bit() + { + const struct { + const char *p; + uint64_t i; + } tbl[] = { + { "0x1234567812345678", uint64_t(0x1234567812345678ull) }, + { "0xaaaaaaaaaaaaaaaa", uint64_t(0xaaaaaaaaaaaaaaaaull) }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + Fp x(tbl[i].p); + Fp y(tbl[i].i); + CYBOZU_TEST_EQUAL(x, y); + } + } + + void getRaw() + { + const struct { + const char *s; + uint32_t v[4]; + size_t vn; + } tbl[] = { + { "0", { 0, 0, 0, 0 }, 1 }, + { "1234", { 1234, 0, 0, 0 }, 1 }, + { "0xaabbccdd12345678", { 0x12345678, 0xaabbccdd, 0, 0 }, 2 }, + { "0x11112222333344445555666677778888", { 0x77778888, 0x55556666, 0x33334444, 0x11112222 }, 4 }, + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + mpz_class x(tbl[i].s); + const size_t bufN = 8; + uint32_t buf[bufN]; + size_t n = mcl::Gmp::getRaw(buf, bufN, x); + CYBOZU_TEST_EQUAL(n, tbl[i].vn); + for (size_t j = 0; j < n; j++) { + CYBOZU_TEST_EQUAL(buf[j], tbl[i].v[j]); + } + } + } + void bench() + { + Fp x("-123456789"); + Fp y("-0x7ffffffff"); + CYBOZU_BENCH("add", operator+, x, x); + CYBOZU_BENCH("sub", operator-, x, y); + CYBOZU_BENCH("mul", operator*, x, x); + CYBOZU_BENCH("sqr", Fp::square, x, x); + CYBOZU_BENCH("div", y += x; operator/, x, y); + } +}; + +void customTest(const char *pStr, const char *xStr, const char *yStr) +{ +#if 0 + { + pStr = "0xfffffffffffffffffffffffffffffffffffffffeffffee37", + MontFp3::setModulo(pStr); + static uint64_t x[3] = { 1, 0, 0 }; + uint64_t z[3]; +std::cout< test; + const char *tbl[] = { + "0x000000000000000100000000000000000000000000000033", // min prime + "0x00000000fffffffffffffffffffffffffffffffeffffac73", + "0x0000000100000000000000000001b8fa16dfab9aca16b6b3", + "0x000000010000000000000000000000000000000000000007", + "0x30000000000000000000000000000000000000000000002b", + "0x70000000000000000000000000000000000000000000001f", + "0x800000000000000000000000000000000000000000000005", + "0xfffffffffffffffffffffffffffffffffffffffeffffee37", + "0xfffffffffffffffffffffffe26f2fc170f69466a74defd8d", + "0xffffffffffffffffffffffffffffffffffffffffffffff13", // max prime + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + printf("prime=%s\n", tbl[i]); + test.run(tbl[i]); + } +} + +CYBOZU_TEST_AUTO(test4) +{ + Test<4> test; + const char *tbl[] = { + "0x0000000000000001000000000000000000000000000000000000000000000085", // min prime + "0x2523648240000001ba344d80000000086121000000000013a700000000000013", + "0x7523648240000001ba344d80000000086121000000000013a700000000000017", + "0x800000000000000000000000000000000000000000000000000000000000005f", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff43", // max prime + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + printf("prime=%s\n", tbl[i]); + test.run(tbl[i]); + } +} + +CYBOZU_TEST_AUTO(test6) +{ + Test<6> test; + const char *tbl[] = { + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff", + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + printf("prime=%s\n", tbl[i]); + test.run(tbl[i]); + } +} + +CYBOZU_TEST_AUTO(test9) +{ + Test<9> test; + const char *tbl[] = { + "0x1ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + }; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + printf("prime=%s\n", tbl[i]); + test.run(tbl[i]); + } +} + +CYBOZU_TEST_AUTO(toStr16) +{ + const char *tbl[] = { + "0x0", + "0x5", + "0x123", + "0x123456789012345679adbc", + "0xffffffff26f2fc170f69466a74defd8d", + "0x100000000000000000000000000000033", + "0x11ee12312312940000000000000000000000000002342343" + }; + MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13"); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + std::string str, str2; + MontFp3 x(tbl[i]); + x.toStr(str, 16); + mpz_class y(tbl[i]); + mcl::Gmp::toStr(str2, y, 16); + CYBOZU_TEST_EQUAL(str, str2); + } +} + +#if 0 +CYBOZU_TEST_AUTO(toStr16bench) +{ + const char *tbl[] = { + "0x0", + "0x5", + "0x123", + "0x123456789012345679adbc", + "0xffffffff26f2fc170f69466a74defd8d", + "0x100000000000000000000000000000033", + "0x11ee12312312940000000000000000000000000002342343" + }; + const int C = 500000; + MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13"); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + std::string str, str2; + MontFp3 x(tbl[i]); + CYBOZU_BENCH_C("Mont:toStr", C, x.toStr, str, 16); + mpz_class y(tbl[i]); + CYBOZU_BENCH_C("Gmp:toStr ", C, mcl::Gmp::toStr, str2, y, 16); + str2.insert(0, "0x"); + CYBOZU_TEST_EQUAL(str, str2); + } +} + +CYBOZU_TEST_AUTO(fromStr16bench) +{ + const char *tbl[] = { + "0x0", + "0x5", + "0x123", + "0x123456789012345679adbc", + "0xffffffff26f2fc170f69466a74defd8d", + "0x100000000000000000000000000000033", + "0x11ee12312312940000000000000000000000000002342343" + }; + const int C = 500000; + MontFp3::setModulo("0xffffffffffffffffffffffffffffffffffffffffffffff13"); + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + std::string str = tbl[i]; + MontFp3 x; + CYBOZU_BENCH_C("Mont:fromStr", C, x.fromStr, str); + + mpz_class y; + str.erase(0, 2); + CYBOZU_BENCH_C("Gmp:fromStr ", C, mcl::Gmp::fromStr, y, str, 16); + x.toStr(str, 16); + std::string str2; + mcl::Gmp::toStr(str2, y, 16); + str2.insert(0, "0x"); + CYBOZU_TEST_EQUAL(str, str2); + } +} +#endif diff --git a/test/proj/ec_test/ec_test.vcxproj b/test/proj/ec_test/ec_test.vcxproj new file mode 100644 index 0000000..b141754 --- /dev/null +++ b/test/proj/ec_test/ec_test.vcxproj @@ -0,0 +1,88 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {46B6E88E-739A-406B-9F68-BC46C5950FA3} + Win32Proj + ec_test + + + + Application + true + v110 + MultiByte + + + Application + false + v110 + true + MultiByte + + + + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + + + + \ No newline at end of file diff --git a/test/proj/fp_test/fp_test.vcxproj b/test/proj/fp_test/fp_test.vcxproj new file mode 100644 index 0000000..a77dc21 --- /dev/null +++ b/test/proj/fp_test/fp_test.vcxproj @@ -0,0 +1,91 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {51266DE6-B57B-4AE3-B85C-282F170E1728} + Win32Proj + fp_test + + + + Application + true + v110 + MultiByte + + + Application + false + v110 + true + MultiByte + + + + + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(SolutionDir)../xbyak/;$(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include + + + Console + true + $(OutDir)$(TargetName)$(TargetExt) + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(SolutionDir)../xbyak/;$(SolutionDir)../cybozulib/include;$(SolutionDir)../cybozulib_ext/mpir/include;$(SolutionDir)include + + + Console + true + true + true + + + + + + + + + \ No newline at end of file diff --git a/test/sq_test.cpp b/test/sq_test.cpp new file mode 100644 index 0000000..6174be6 --- /dev/null +++ b/test/sq_test.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + +CYBOZU_TEST_AUTO(sqrt) +{ + const int tbl[] = { 3, 5, 7, 11, 13, 17, 19, 257, 997, 1031 }; + mcl::SquareRoot sq; + for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { + const mpz_class p = tbl[i]; + sq.set(p); + for (mpz_class a = 1; a < p; a++) { + mpz_class x; + if (sq.get(x, a)) { + mpz_class y; + y = (x * x) % p; + CYBOZU_TEST_EQUAL(a, y); + } + } + } +}