add mcl_fp_addNC{160,192,224}

dev
MITSUNARI Shigeo 9 years ago
parent bdc0ffd8fe
commit 2737a8e97a
  1. 65
      src/low_armv7.s
  2. 11
      test/low_test.cpp

@ -72,6 +72,65 @@ cl_fp_addNC128_2:
pop {r4, r5, r6, lr}
bx lr
.globl mcl_fp_addNC160
.align 2
mcl_fp_addNC160:
push {r4, lr}
ldm r1!, {r3, r4}
ldm r2!, {r12, lr}
adds r3, r3, r12
adcs r4, r4, lr
stm r0!, {r3, r4}
ldm r1, {r1, r3, r4}
ldm r2, {r2, r12, lr}
adcs r1, r1, r2
adcs r3, r3, r12
adcs r4, r4, lr
stm r0, {r1, r3, r4}
pop {r4, lr}
bx lr
.globl mcl_fp_addNC192
.align 2
mcl_fp_addNC192:
push {r4, r5, r6, lr}
ldm r1!, {r3, r4, r5}
ldm r2!, {r6, r12, lr}
adds r3, r3, r6
adcs r4, r4, r12
adcs r5, r5, lr
stm r0!, {r3, r4, r5}
ldm r1, {r3, r4, r5}
ldm r2, {r6, r12, lr}
adcs r3, r3, r6
adcs r4, r4, r12
adcs r5, r5, lr
stm r0, {r3, r4, r5}
pop {r4, r5, r6, lr}
bx lr
.globl mcl_fp_addNC224
.align 2
mcl_fp_addNC224:
push {r4, r5, r6, lr}
ldm r1!, {r3, r4, r5}
ldm r2!, {r6, r12, lr}
adds r3, r3, r6
adcs r4, r4, r12
adcs r5, r5, lr
stm r0!, {r3, r4, r5}
ldm r1, {r1, r3, r4, r5}
ldm r2, {r2, r6, r12, lr}
adcs r1, r1, r2
adcs r3, r3, r6
adcs r4, r4, r12
adcs r5, r5, lr
stm r0, {r1, r3, r4, r5}
pop {r4, r5, r6, lr}
bx lr
.globl mcl_fp_addNC256
.align 2
mcl_fp_addNC256:
@ -84,12 +143,12 @@ mcl_fp_addNC256:
adcs r6, r6, lr
stm r0!, {r3, r4, r5, r6}
ldm r1!, {r3, r4, r5, r6}
ldm r2!, {r7, r8, r12, lr}
ldm r1, {r3, r4, r5, r6}
ldm r2, {r7, r8, r12, lr}
adcs r3, r3, r7
adcs r4, r4, r8
adcs r5, r5, r12
adcs r6, r6, lr
stm r0!, {r3, r4, r5, r6}
stm r0, {r3, r4, r5, r6}
pop {r4, r5, r6, r7, r8, lr}
bx lr

@ -11,6 +11,9 @@ cybozu::XorShift rg;
extern "C" void mcl_fp_addNC64(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC96(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC128(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC160(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC192(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC224(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC256(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void add_test(uint32_t *z, const uint32_t *x, const uint32_t *y);
@ -22,6 +25,9 @@ void addNC(uint32_t *z, const uint32_t *x, const uint32_t *y);
DEF_ADD(64)
DEF_ADD(96)
DEF_ADD(128)
DEF_ADD(160)
DEF_ADD(192)
DEF_ADD(224)
DEF_ADD(256)
#define CAT(S, BIT) "S##BIT"
@ -48,8 +54,11 @@ void benchAdd()
CYBOZU_TEST_AUTO(addNC64) { benchAdd<64>(); }
CYBOZU_TEST_AUTO(addNC96) { benchAdd<96>(); }
CYBOZU_TEST_AUTO(addNC128) { benchAdd<128>(); }
CYBOZU_TEST_AUTO(addNC160) { benchAdd<160>(); }
CYBOZU_TEST_AUTO(addNC192) { benchAdd<192>(); }
CYBOZU_TEST_AUTO(addNC224) { benchAdd<224>(); }
CYBOZU_TEST_AUTO(addNC256) { benchAdd<256>(); }
#if 1
#if 0
CYBOZU_TEST_AUTO(addNC)
{
using namespace mcl::fp;

Loading…
Cancel
Save