diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index a296801..b1457df 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -40,6 +40,7 @@ typedef void (*void3u)(Unit*, const Unit*, const Unit*); typedef void (*void4u)(Unit*, const Unit*, const Unit*, const Unit*); typedef int (*int2u)(Unit*, const Unit*); +typedef Unit (*u1uII)(Unit*, Unit, Unit); typedef Unit (*u3u)(Unit*, const Unit*, const Unit*); struct Block { diff --git a/src/fp_proto.hpp b/src/fp_proto.hpp index f6d4b54..8f99839 100644 --- a/src/fp_proto.hpp +++ b/src/fp_proto.hpp @@ -9,6 +9,11 @@ #include #include +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4127) +#endif + namespace mcl { namespace fp { struct Gtag; // GMP @@ -38,6 +43,7 @@ template struct AddPre { static inline Unit func(Unit *z, const Unit *x, const Unit *y) { + if (N == 0) return 0; return mpn_add_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); } static const u3u f; @@ -45,6 +51,18 @@ struct AddPre { template const u3u AddPre::f = &AddPre::func; +// (carry, x[N]) <- x[N] + y +template +struct AddUnitPre { + static inline Unit func(Unit *x, Unit n, Unit y) + { + return mpn_add_1((mp_limb_t*)x, (const mp_limb_t*)x, (int)n, y); + } + static const u1uII f; +}; +template +const u1uII AddUnitPre::f = &AddUnitPre::func; + // (carry, z[N]) <- x[N] - y[N] template struct SubPre { @@ -78,9 +96,47 @@ const void3u Neg::f = Neg::func; // z[N * 2] <- x[N] * y[N] template struct MulPre { + /* + W = 1 << H + x = aW + b, y = cW + d + xy = acW^2 + (ad + bc)W + bd + ad + bc = (a + b)(c + d) - ac - bd + */ + static inline void karatsuba(Unit *z, const Unit *x, const Unit *y) + { + const size_t H = N / 2; + MulPre::f(z, x, y); // bd + MulPre::f(z + N, x + H, y + H); // ac + Unit a_b[H]; + Unit c_d[H]; + Unit c1 = AddPre::f(a_b, x, x + H); // a + b + Unit c2 = AddPre::f(c_d, y, y + H); // c + d + Unit tmp[N + H]; + MulPre::f(tmp, a_b, c_d); + Unit c = c1 & c2; + if (c1) { + c += AddPre::f(tmp + H, tmp + H, c_d); + } + if (c2) { + c += AddPre::f(tmp + H, tmp + H, a_b); + } + // c:tmp[N] = (a + b)(c + d) + c -= SubPre::f(tmp, tmp, z); + c -= SubPre::f(tmp, tmp, z + N); + // c:tmp[N] = ad + bc + c += AddPre::f(z + H, z + H, tmp); + AddUnitPre::f(z + H, N, c); + } static inline void func(Unit *z, const Unit *x, const Unit *y) { - return mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); +#if 0 + if (N == 0) return; + if (N > 2 && (N % 2) == 0) { + karatsuba(z, x, y); + return; + } +#endif + mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, N); } static const void3u f; }; @@ -88,6 +144,22 @@ struct MulPre { template const void3u MulPre::f = &MulPre::func; + +#if 0 +template +struct MulPre<0, Tag> { + static inline void f(Unit*, const Unit*, const Unit*){} +}; + +template +struct MulPre<1, Tag> { + static inline void f(Unit* z, const Unit* x, const Unit* y) + { + mpn_mul_n((mp_limb_t*)z, (const mp_limb_t*)x, (const mp_limb_t*)y, 1); + } +}; +#endif + // z[N * 2] <- x[N] * x[N] template struct SqrPre { @@ -419,3 +491,6 @@ MCL_FP_DEF_FUNC_SPECIAL(A) #endif +#ifdef _WIN32 + #pragma warning(pop) +#endif