optimize Vint::div for small denominator

pull/2/head
MITSUNARI Shigeo 6 years ago
parent fa696b564b
commit da49e9158c
  1. 54
      include/mcl/vint.hpp
  2. 64
      test/vint_test.cpp

@ -568,6 +568,7 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn)
yn = getRealSize(y, yn);
if (x == y) {
assert(xn == yn);
x_is_y:
clearN(r, rn);
if (q) {
q[0] = 1;
@ -579,6 +580,7 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn)
/*
if y > x then q = 0 and r = x
*/
q_is_zero:
copyN(r, x, xn);
clearN(r + xn, rn - xn);
if (q) clearN(q, qn);
@ -598,11 +600,61 @@ void divNM(T *q, size_t qn, T *r, const T *x, size_t xn, const T *y, size_t yn)
clearN(r + 1, rn - 1);
return;
}
const size_t yTopBit = cybozu::bsr(y[yn - 1]);
assert(yn >= 2);
if (xn == yn) {
const size_t xTopBit = cybozu::bsr(x[xn - 1]);
if (xTopBit < yTopBit) goto q_is_zero;
if (yTopBit == xTopBit) {
int ret = compareNM(x, xn, y, yn);
if (ret == 0) goto x_is_y;
if (ret < 0) goto q_is_zero;
if (r) {
subN(r, x, y, yn);
}
if (q) {
q[0] = 1;
clearN(q + 1, qn - 1);
}
return;
}
assert(xTopBit > yTopBit);
// fast reduction for larger than fullbit-2 size p
if (yTopBit >= sizeof(T) * 8 - 3) {
T *xx = (T*)CYBOZU_ALLOCA(sizeof(T) * xn);
T qv = 0;
if (yTopBit == sizeof(T) * 8 - 2) {
copyN(xx, x, xn);
} else {
qv = x[xn - 1] >> (yTopBit + 1);
mulu1(xx, y, yn, qv);
subN(xx, x, xx, xn);
xn = getRealSize(xx, xn);
}
for (;;) {
T ret = subN(xx, xx, y, yn);
if (ret) {
addN(xx, xx, y, yn);
break;
}
qv++;
xn = getRealSize(xx, xn);
}
if (r) {
copyN(r, xx, xn);
clearN(r + xn, rn - xn);
}
if (q) {
q[0] = qv;
clearN(q + 1, qn - 1);
}
return;
}
}
/*
bitwise left shift x and y to adjust MSB of y[yn - 1] = 1
*/
const size_t shift = sizeof(T) * 8 - 1 - cybozu::bsr(y[yn - 1]);
const size_t shift = sizeof(T) * 8 - 1 - yTopBit;
T *xx = (T*)CYBOZU_ALLOCA(sizeof(T) * (xn + 1));
const T *yy;
if (shift) {

@ -551,14 +551,70 @@ CYBOZU_TEST_AUTO(quotRem)
"0xfffffffffffff0000000000000000000000000000000000000000000000000000000000000001",
"521481209941628322292632858916605385658190900090571826892867289394157573281830188869820088065",
},
{
"0x1230000000000000456",
"0x1230000000000000457",
"0x1230000000000000456",
},
{
"0x1230000000000000456",
"0x1230000000000000456",
"0",
},
{
"0x1230000000000000456",
"0x1230000000000000455",
"1",
},
{
"0x1230000000000000456",
"0x2000000000000000000",
"0x1230000000000000456",
},
{
"0xffffffffffffffffffffffffffffffff",
"0x80000000000000000000000000000000",
"0x7fffffffffffffffffffffffffffffff",
},
{
"0xffffffffffffffffffffffffffffffff",
"0x7fffffffffffffffffffffffffffffff",
"1",
},
{
"0xffffffffffffffffffffffffffffffff",
"0x70000000000000000000000000000000",
"0x1fffffffffffffffffffffffffffffff",
},
{
"0xffffffffffffffffffffffffffffffff",
"0x30000000000000000000000000000000",
"0x0fffffffffffffffffffffffffffffff",
},
{
"0xffffffffffffffffffffffffffffffff",
"0x10000000000000000000000000000000",
"0x0fffffffffffffffffffffffffffffff",
},
{
"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
"0x2523648240000001ba344d80000000086121000000000013a700000000000013",
"0x212ba4f27ffffff5a2c62effffffffcdb939ffffffffff8a15ffffffffffff8d",
},
{
"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
"0x2523648240000001ba344d8000000007ff9f800000000010a10000000000000d",
"0x212ba4f27ffffff5a2c62effffffffd00242ffffffffff9c39ffffffffffffb1",
},
};
mcl::Vint x, y, r;
mcl::Vint x, y, q, r1, r2;
for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
x.setStr(tbl[i].x);
y.setStr(tbl[i].y);
r.setStr(tbl[i].r);
x %= y;
CYBOZU_TEST_EQUAL(x, r);
r1.setStr(tbl[i].r);
mcl::Vint::divMod(&q, r2, x, y);
CYBOZU_TEST_EQUAL(r1, r2);
CYBOZU_TEST_EQUAL(x, q * y + r2);
}
}

Loading…
Cancel
Save