From ee1eef38c82df079172502f850a2623d08233bf4 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Thu, 21 Jan 2021 16:41:47 +0900 Subject: [PATCH] remove one adox in Fp::mul --- src/fp_generator.hpp | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/fp_generator.hpp b/src/fp_generator.hpp index 892a8f5..9ccc986 100644 --- a/src/fp_generator.hpp +++ b/src/fp_generator.hpp @@ -1429,22 +1429,22 @@ private: } /* c[n+2] = c[n+1] + px[n] * rdx - use rax + use rax, t0 */ - void mulAdd(const Pack& c, int n, const RegExp& px) + void mulAdd(const Pack& c, int n, const RegExp& px, const Reg64& t0) { const Reg64& a = rax; - xor_(a, a); - for (int i = 0; i < n; i++) { - mulx(c[n + 1], a, ptr [px + i * 8]); + xor_(c[n + 1], c[n + 1]); // c[n + 1] = 0 + for (int i = 0; i < n - 1; i++) { + mulx(t0, a, ptr [px + i * 8]); adox(c[i], a); - adcx(c[i + 1], c[n + 1]); + adcx(c[i + 1], t0); } - mov(a, 0); - mov(c[n + 1], a); - adox(c[n], a); - adcx(c[n + 1], a); - adox(c[n + 1], a); + mulx(t0, a, ptr [px + (n - 1) * 8]); + adox(c[n - 1], a); + adox(t0, c[n + 1]); // carry o + adcx(c[n], t0); + adc(c[n + 1], 0); } /* input @@ -1481,18 +1481,17 @@ private: } std::swap(pt0, pt1); } - mov(c[n], 0); - adc(c[n], *pt0); + adc(*pt0, 0); + mov(c[n], *pt0); } else { // c[7..0] = c[6..0] + px[5..0] * rdx - mulAdd(c, 6, px); + mulAdd(c, 6, px, t1); } - mov(a, rp_); - mul(c[0]); // q = a - mov(d, a); - lea(t1, ptr[rip+pL_]); + mov(d, rp_); + imul(d, c[0]); // q = d + lea(t0, ptr[rip+pL_]); // c += p * q - mulAdd(c, 6, t1); + mulAdd(c, 6, t0, t1); } /* input (z, x, y) = (p0, p1, p2)