From da49e9158cc858d16f2277e39bd86c0c714ce30f Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Wed, 6 Mar 2019 16:47:23 +0900 Subject: [PATCH] optimize Vint::div for small denominator --- include/mcl/vint.hpp | 54 ++++++++++++++++++++++++++++++++++++- test/vint_test.cpp | 64 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 113 insertions(+), 5 deletions(-) diff --git a/include/mcl/vint.hpp b/include/mcl/vint.hpp index c2a7a10..f9e1859 100644 --- a/include/mcl/vint.hpp +++ b/include/mcl/vint.hpp @@ -568,6 +568,7 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn) yn = getRealSize(y, yn); if (x == y) { assert(xn == yn); + x_is_y: clearN(r, rn); if (q) { q[0] = 1; @@ -579,6 +580,7 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn) /* if y > x then q = 0 and r = x */ + q_is_zero: copyN(r, x, xn); clearN(r + xn, rn - xn); if (q) clearN(q, qn); @@ -598,11 +600,61 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn) clearN(r + 1, rn - 1); return; } + const size_t yTopBit = cybozu::bsr(y[yn - 1]); assert(yn >= 2); + if (xn == yn) { + const size_t xTopBit = cybozu::bsr(x[xn - 1]); + if (xTopBit < yTopBit) goto q_is_zero; + if (yTopBit == xTopBit) { + int ret = compareNM(x, xn, y, yn); + if (ret == 0) goto x_is_y; + if (ret < 0) goto q_is_zero; + if (r) { + subN(r, x, y, yn); + } + if (q) { + q[0] = 1; + clearN(q + 1, qn - 1); + } + return; + } + assert(xTopBit > yTopBit); + // fast reduction for larger than fullbit-2 size p + if (yTopBit >= sizeof(T) * 8 - 3) { + T *xx = (T*)CYBOZU_ALLOCA(sizeof(T) * xn); + T qv = 0; + if (yTopBit == sizeof(T) * 8 - 2) { + copyN(xx, x, xn); + } else { + qv = x[xn - 1] >> (yTopBit + 1); + mulu1(xx, y, yn, qv); + subN(xx, x, xx, xn); + xn = getRealSize(xx, xn); + } + for (;;) { + T ret = subN(xx, xx, y, yn); + if (ret) { + addN(xx, xx, y, yn); + break; + } + qv++; + xn = getRealSize(xx, xn); + } + if (r) { + copyN(r, xx, xn); + clearN(r + xn, rn - xn); + } + if (q) { + q[0] = qv; + clearN(q + 1, qn - 1); + } + return; + } + } /* bitwise left shift x and y to adjust MSB of y[yn - 1] = 1 */ - const size_t shift = sizeof(T) * 8 - 1 - cybozu::bsr(y[yn - 1]); + const size_t shift = sizeof(T) * 8 - 1 - yTopBit; T *xx = (T*)CYBOZU_ALLOCA(sizeof(T) * (xn + 1)); const T *yy; if (shift) { diff --git a/test/vint_test.cpp b/test/vint_test.cpp index 0eea8a9..ca4cdbf 100644 --- a/test/vint_test.cpp +++ b/test/vint_test.cpp @@ -551,14 +551,70 @@ CYBOZU_TEST_AUTO(quotRem) "0xfffffffffffff0000000000000000000000000000000000000000000000000000000000000001", "521481209941628322292632858916605385658190900090571826892867289394157573281830188869820088065", }, + { + "0x1230000000000000456", + "0x1230000000000000457", + "0x1230000000000000456", + }, + { + "0x1230000000000000456", + "0x1230000000000000456", + "0", + }, + { + "0x1230000000000000456", + "0x1230000000000000455", + "1", + }, + { + "0x1230000000000000456", + "0x2000000000000000000", + "0x1230000000000000456", + }, + { + "0xffffffffffffffffffffffffffffffff", + "0x80000000000000000000000000000000", + "0x7fffffffffffffffffffffffffffffff", + }, + { + "0xffffffffffffffffffffffffffffffff", + "0x7fffffffffffffffffffffffffffffff", + "1", + }, + { + "0xffffffffffffffffffffffffffffffff", + "0x70000000000000000000000000000000", + "0x1fffffffffffffffffffffffffffffff", + }, + { + "0xffffffffffffffffffffffffffffffff", + "0x30000000000000000000000000000000", + "0x0fffffffffffffffffffffffffffffff", + }, + { + "0xffffffffffffffffffffffffffffffff", + "0x10000000000000000000000000000000", + "0x0fffffffffffffffffffffffffffffff", + }, + { + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "0x2523648240000001ba344d80000000086121000000000013a700000000000013", + "0x212ba4f27ffffff5a2c62effffffffcdb939ffffffffff8a15ffffffffffff8d", + }, + { + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "0x2523648240000001ba344d8000000007ff9f800000000010a10000000000000d", + "0x212ba4f27ffffff5a2c62effffffffd00242ffffffffff9c39ffffffffffffb1", + }, }; - mcl::Vint x, y, r; + mcl::Vint x, y, q, r1, r2; for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) { x.setStr(tbl[i].x); y.setStr(tbl[i].y); - r.setStr(tbl[i].r); - x %= y; - CYBOZU_TEST_EQUAL(x, r); + r1.setStr(tbl[i].r); + mcl::Vint::divMod(&q, r2, x, y); + CYBOZU_TEST_EQUAL(r1, r2); + CYBOZU_TEST_EQUAL(x, q * y + r2); } }