From 629496d1a1178ca6274ca154085e0fe9e9c012bd Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 31 Oct 2018 18:39:59 +0900 Subject: [PATCH] fix cast --- src/fp_generator.hpp | 56 ++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 446260d..9e42414 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -296,7 +296,7 @@ private: op.fp_negA_ = gen_fp_neg(); - const void* func = 0; + void* func = 0; // setup fp_tower op.fp2_mulNF = 0; func = gen_fpDbl_add(); @@ -320,10 +320,11 @@ private: op.fp_mul = reinterpret_cast(func); // used in toMont/fromMont op.fp_mulA_ = reinterpret_cast(func); } + func = gen_sqr(); + if (func) { + op.fp_sqrA_ = reinterpret_cast(func); + } if (op.N > 4) return; - align(16); - op.fp_sqrA_ = getCurr(); - gen_sqr(); if (op.primeMode != PM_NIST_P192 && op.N <= 4) { // support general op.N but not fast for op.N > 4 align(16); op.fp_preInv = getCurr(); @@ -685,10 +686,10 @@ private: outLocalLabel(); return func; } - const void* gen_fpDbl_add() + void* gen_fpDbl_add() { align(16); - const void* func = getCurr(); + void* func = getCurr(); if (pn_ <= 4) { int tn = pn_ * 2 + (isFullBit_ ? 1 : 0); StackFrame sf(this, 3, tn); @@ -713,10 +714,10 @@ private: } return 0; } - const void* gen_fpDbl_sub() + void* gen_fpDbl_sub() { align(16); - const void* func = getCurr(); + void* func = getCurr(); if (pn_ <= 4) { int tn = pn_ * 2; StackFrame sf(this, 3, tn); @@ -817,10 +818,10 @@ private: mov(ptr [pz + (pn_ - 1) * 8], *t0); return func; } - const void* gen_mul() + void* gen_mul() { align(16); - const void* func = getCurr(); + void* func = getCurr(); if (op_->primeMode == PM_NIST_P192) { StackFrame sf(this, 3, 10 | UseRDX, 8 * 6); mulPre3(rsp, sf.p[1], sf.p[2], sf.t); @@ -835,7 +836,7 @@ private: gen_montMul4(); return func; } - if (pn_ == 6 && useAdx_) { + if (pn_ == 6 && useMulx_ && useAdx_) { // gen_montMul6(p_, rp_); StackFrame sf(this, 3, 10 | UseRDX, (1 + 12) * 8); mov(ptr[rsp + 12 * 8], gp0); @@ -1120,10 +1121,10 @@ private: movq(z, xm0); store_mr(z, Pack(t10, t9, t8, t4)); } - const void* gen_fpDbl_mod(const fp::Op& op) + void* gen_fpDbl_mod(const fp::Op& op) { align(16); - const void* func = getCurr(); + void* func = getCurr(); if (op.primeMode == PM_NIST_P192) { StackFrame sf(this, 2, 6 | UseRDX); fpDbl_mod_NIST_P192(sf.p[0], sf.p[1], sf.t); @@ -1166,18 +1167,32 @@ private: } return 0; } - void gen_sqr() + void* gen_sqr() { + align(16); + void* func = getCurr(); if (op_->primeMode == PM_NIST_P192) { StackFrame sf(this, 3, 10 | UseRDX, 6 * 8); Pack t = sf.t; t.append(sf.p[2]); sqrPre3(rsp, sf.p[1], t); fpDbl_mod_NIST_P192(sf.p[0], rsp, sf.t); + return func; } if (pn_ == 3) { gen_montSqr3(); - return; + return func; + } + if (pn_ == 6 && useMulx_ && useAdx_) { + StackFrame sf(this, 3, 10 | UseRDX, (1 + 12) * 8); + mov(ptr[rsp + 12 * 8], gp0); + mov(gp0, rsp); + mov(gp2, gp1); + call(mulPreL); // gp0, x, y + mov(gp0, ptr[rsp + 12 * 8]); + mov(gp1, rsp); + call(fpDbl_modL); + return func; } #if 0 // (sqrPre + mod) is slower than mul if (pn_ == 4 && useMulx_) { @@ -1188,7 +1203,7 @@ private: mov(gp0, sf.p[0]); mov(gp1, rsp); call(fpDbl_modL); - return; + return func; } #endif // sqr(y, x) = mul(y, x, x) @@ -1198,6 +1213,7 @@ private: mov(rdx, rsi); #endif jmp((const void*)op_->fp_mulA_); + return func; } /* input (pz[], px[], py[]) @@ -2134,10 +2150,10 @@ private: movq(z, xm0); store_mr(z, zp); } - const void* gen_fpDbl_sqrPre(const fp::Op&/* op */) + void* gen_fpDbl_sqrPre(const fp::Op&/* op */) { align(16); - const void* func = getCurr(); + void* func = getCurr(); if (pn_ == 2 && useMulx_) { StackFrame sf(this, 2, 7 | UseRDX); sqrPre2(sf.p[0], sf.p[1], sf.t); @@ -2175,10 +2191,10 @@ private: return func; #endif } - const void* gen_fpDbl_mulPre() + void* gen_fpDbl_mulPre() { align(16); - const void* func = getCurr(); + void* func = getCurr(); if (pn_ == 2 && useMulx_) { StackFrame sf(this, 3, 5 | UseRDX); mulPre2(sf.p[0], sf.p[1], sf.p[2], sf.t);