From ac0e9d9813e3f1da294505b5689d13b7d1d95429 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 1 Nov 2018 21:48:44 +0900 Subject: [PATCH] enable Fp::sqr for bls12 --- src/fp.cpp | 13 ++++++------- src/fp_generator.hpp | 26 +++++++++++++++++--------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/fp.cpp b/src/fp.cpp index 8db755d..473b26c 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -499,18 +499,17 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, Mode mode, size_t mclMaxBi fpDbl_mod = &mcl::vint::mcl_fpDbl_mod_SECP256K1; } #endif - if (!fp::initForMont(*this, p, mode)) return false; - { - bool b; - sq.set(&b, mp); - if (!b) return false; - } if (N * UnitBitSize <= 256) { hash = sha256; } else { hash = sha512; } - return true; + { + bool b; + sq.set(&b, mp); + if (!b) return false; + } + return fp::initForMont(*this, p, mode); } void copyUnitToByteAsLE(uint8_t *dst, const Unit *src, size_t byteSize) diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 8f55607..0749e44 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -836,13 +836,19 @@ private: gen_montMul4(); return func; } - if (pn_ == 6 && useMulx_ && useAdx_) { + if (pn_ == 6 && !isFullBit_ && useMulx_ && useAdx_) { // gen_montMul6(p_, rp_); - StackFrame sf(this, 3, 10 | UseRDX, (1 + 12) * 8); - mov(ptr[rsp + 12 * 8], gp0); + if (mulPreL.getAddress() == 0 || fpDbl_modL.getAddress() == 0) return 0; + StackFrame sf(this, 3, 10 | UseRDX, 12 * 8); + /* + use xm3 + rsp + [0, ..12 * 8) ; mul(x, y) + */ + movq(xm3, gp0); mov(gp0, rsp); call(mulPreL); // gp0, x, y - mov(gp0, ptr[rsp + 12 * 8]); + movq(gp0, xm3); mov(gp1, rsp); call(fpDbl_modL); return func; @@ -1154,7 +1160,7 @@ private: ret(); return func; } - if (pn_ == 6 && !isFullBit_ && useAdx_) { + if (pn_ == 6 && !isFullBit_ && useMulx_ && useAdx_) { StackFrame sf(this, 3, 10 | UseRDX, 0, false); call(fpDbl_modL); sf.close(); @@ -1203,20 +1209,22 @@ private: #endif return func; } - if (pn_ == 6 && useMulx_ && useAdx_) { - StackFrame sf(this, 3, 10 | UseRDX, (1 + 12 + 6) * 8); + if (pn_ == 6 && !isFullBit_ && useMulx_ && useAdx_) { + if (fpDbl_modL.getAddress() == 0) return 0; + StackFrame sf(this, 3, 10 | UseRDX, (12 + 6) * 8); /* + use xm3 rsp - [(12 + 6 * 8] ; gp0 [6 * 8, (12 + 6) * 8) ; sqrPre(x, x) [0..6 * 8) ; stack for sqrPre6 */ - mov(ptr[rsp + (12 + 6) * 8], gp0); + movq(xm3, gp0); Pack t = sf.t; t.append(sf.p[2]); // sqrPre6 uses 6 * 8 bytes stack sqrPre6(rsp + 6 * 8, sf.p[1], t); mov(gp0, ptr[rsp + (12 + 6) * 8]); + movq(gp0, xm3); lea(gp1, ptr[rsp + 6 * 8]); call(fpDbl_modL); return func;