diff --git a/src/low_armv7.s b/src/low_armv7.s index 7d252e4..7c70f86 100644 --- a/src/low_armv7.s +++ b/src/low_armv7.s @@ -57,3 +57,24 @@ mcl_fp_addNC128: pop {r4, lr} bx lr + .globl mcl_fp_addNC256 + .align 2 +mcl_fp_addNC256: + push {r4, r5, r6, r7, r8, lr} + ldm r1!, {r3, r4, r5, r6} + ldm r2!, {r7, r8, r12, lr} + adds r3, r3, r7 + adcs r4, r4, r8 + adcs r5, r5, r12 + adcs r6, r6, lr + stm r0!, {r3, r4, r5, r6} + + ldm r1!, {r3, r4, r5, r6} + ldm r2!, {r7, r8, r12, lr} + adcs r3, r3, r7 + adcs r4, r4, r8 + adcs r5, r5, r12 + adcs r6, r6, lr + stm r0!, {r3, r4, r5, r6} + pop {r4, r5, r6, r7, r8, lr} + bx lr diff --git a/test/low_test.cpp b/test/low_test.cpp index 80ee474..478620b 100644 --- a/test/low_test.cpp +++ b/test/low_test.cpp @@ -11,6 +11,7 @@ cybozu::XorShift rg; extern "C" void mcl_fp_addNC64(uint32_t *z, const uint32_t *x, const uint32_t *y); extern "C" void mcl_fp_addNC96(uint32_t *z, const uint32_t *x, const uint32_t *y); extern "C" void mcl_fp_addNC128(uint32_t *z, const uint32_t *x, const uint32_t *y); +extern "C" void mcl_fp_addNC256(uint32_t *z, const uint32_t *x, const uint32_t *y); template void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y); @@ -20,6 +21,7 @@ void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y); DEF_ADD(64) DEF_ADD(96) DEF_ADD(128) +DEF_ADD(256) #define CAT(S, BIT) "S##BIT" @@ -45,11 +47,12 @@ void benchAdd() CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); } CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); } CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); } +CYBOZU_TEST_AUTO(addNC256) { benchAdd<256>(); } #if 0 -CYBOZU_TEST_AUTO(addNC128) +CYBOZU_TEST_AUTO(addNC) { using namespace mcl::fp; - const size_t bit = 128; + const size_t bit = 256; const size_t N = bit / UnitBitSize; Unit x[N], y[N]; for (int i = 0; i < 10; i++) { @@ -60,11 +63,12 @@ CYBOZU_TEST_AUTO(addNC128) low_add(z, x, y); addNC(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); - mcl_fp_addNC128_2(w, x, y); + mcl_fp_addNC256_2(w, x, y); CYBOZU_TEST_EQUAL_ARRAY(z, w, N); } std::string name = "name" + cybozu::itoa(bit); CYBOZU_BENCH(name.c_str(), addNC, x, x, y); - CYBOZU_BENCH("ad128", mcl_fp_addNC128_2, x, x, y); + CYBOZU_BENCH("ad128", mcl_fp_addNC256_2, x, x, y); } #endif +