fast version of mcl_fp_addNC64

dev
MITSUNARI Shigeo 9 years ago
parent 295766369e
commit 0a8c81ff15
  1. 13
      src/low_armv7.s
  2. 6
      test/low_test.cpp

@ -1,22 +1,9 @@
.arch armv7-a .arch armv7-a
.global mcl_fp_addNC64 .global mcl_fp_addNC64
.global mcl_fp_addNC64_2
.align 2 .align 2
mcl_fp_addNC64: mcl_fp_addNC64:
ldrd r2, [r2]
stmfd sp!, {r4, r5}
ldrd r4, [r1]
adds r2, r2, r4
adc r3, r3, r5
ldmfd sp!, {r4, r5}
strd r2, [r0]
bx lr
# slow
.align 2
mcl_fp_addNC64_2:
ldm r1, {r3, r12} ldm r1, {r3, r12}
ldm r2, {r1, r2} ldm r2, {r1, r2}
adds r1, r1, r3 adds r1, r1, r3

@ -8,7 +8,6 @@
cybozu::XorShift rg; cybozu::XorShift rg;
extern "C" void mcl_fp_addNC64(uint32_t *z, const uint32_t *x, const uint32_t *y); extern "C" void mcl_fp_addNC64(uint32_t *z, const uint32_t *x, const uint32_t *y);
extern "C" void mcl_fp_addNC64_2(uint32_t *z, const uint32_t *x, const uint32_t *y);
CYBOZU_TEST_AUTO(addNC) CYBOZU_TEST_AUTO(addNC)
{ {
@ -23,12 +22,7 @@ CYBOZU_TEST_AUTO(addNC)
low_add<N>(z, x, y); low_add<N>(z, x, y);
mcl_fp_addNC64(w, x, y); mcl_fp_addNC64(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N); CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
mcl_fp_addNC64_2(w, x, y);
CYBOZU_TEST_EQUAL_ARRAY(z, w, N);
} }
CYBOZU_BENCH("add1", mcl_fp_addNC64, x, x, y); CYBOZU_BENCH("add1", mcl_fp_addNC64, x, x, y);
CYBOZU_BENCH("add2", mcl_fp_addNC64_2, x, x, y);
CYBOZU_BENCH("add1", mcl_fp_addNC64, x, x, y);
CYBOZU_BENCH("add2", mcl_fp_addNC64_2, x, x, y);
} }

Loading…
Cancel
Save