From b7873f91096f6b6759afbdfe032d13aab9e9075c Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Fri, 22 Jul 2016 15:16:21 +0900 Subject: [PATCH] add test of mulPre768(not run) --- Makefile | 2 +- sample/large.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6d7d3c8..30ff027 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ LLVM_SRC=src/base$(BIT).ll # CPU is used for llvm # see $(LLVM_LLC) --version LLVM_FLAGS=-march=$(CPU) -relocation-model=pic #-misched=ilpmax -#LLVM_FLAGS+=-pre-RA-sched=list-ilp -max-sched-reorder=128 +LLVM_FLAGS+=-pre-RA-sched=list-ilp -max-sched-reorder=128 HAS_BMI2=$(shell cat "/proc/cpuinfo" | grep bmi2 >/dev/null && echo "1") ifeq ($(HAS_BMI2),1) diff --git a/sample/large.cpp b/sample/large.cpp index 5397697..5a6e37a 100644 --- a/sample/large.cpp +++ b/sample/large.cpp @@ -8,7 +8,33 @@ typedef mcl::FpT<> Fp; typedef mcl::fp::Unit Unit; +using namespace mcl::fp; +#include "../src/low_gmp.hpp" +const size_t N = 12; + +void mulPre768(Unit *pz, const Unit *px, const Unit *py) +{ + /* + W = 1 << H + (aW + b)(cW + d) = acW^2 + (ad + bc)W + bd + ad + bc = (a + b)(c + d) - ac - bd + */ + const size_t H = N / 2; + low_mul(pz, px, py); // bd + low_mul(pz + N, px + H, py + H); // ac + Unit a_b[H + 1]; + Unit c_d[H + 1]; + a_b[H] = low_add(a_b, px, px + H); // a + b + c_d[H] = low_add(c_d, py, py + H); // c + d + Unit work[N + H] = {}; + low_mul(work, a_b, c_d); + if (c_d[H]) low_add(work + H, work + H, c_d); + if (a_b[H]) low_add(work + H, work + H, a_b); + work[N] -= low_sub(work, work, pz); + work[N] -= low_sub(work, work, pz + N); + low_add(pz + H, pz + H, work); +} void test(const std::string& pStr, mcl::fp::Mode mode) { printf("test %s\n", mcl::fp::ModeToStr(mode)); @@ -31,6 +57,24 @@ void test(const std::string& pStr, mcl::fp::Mode mode) CYBOZU_BENCH("mul", Fp::mul, x, x, x); } +void testMul() +{ + mcl::fp::Unit ux[N], uy[N], a[N * 2], b[N * 2]; + for (size_t i = 0; i < N; i++) { + ux[i] = -i * i + 5; + uy[i] = -i * i + 9; + } + low_mul<12>(a, ux, uy); + mulPre768(b, ux, uy); + for (size_t i = 0; i < N * 2; i++) { + if (a[i] != b[i]) { + printf("ERR %016llx %016llx\n", (long long)a[i], (long long)b[i]); + } + } + puts("end testMul"); + CYBOZU_BENCH("mulPre768", mulPre768, ux, ux, uy); +} + void testAll(const std::string& pStr) { test(pStr, mcl::fp::FP_GMP); @@ -49,6 +93,7 @@ int main() }; testAll(pTbl[0]); testAll(pTbl[1]); +// testMul(); } catch (std::exception& e) { printf("err %s\n", e.what()); puts("make clean");