new mulVec

5 years ago · 2f1b4c9a56
parent 0cec8d45ef
commit 2f1b4c9a56
2 changed files with 31 additions and 45 deletions
--- a/include/mcl/ec.hpp
+++ b/include/mcl/ec.hpp
@ -1155,8 +1155,8 @@ public:
 		gmp::getNAFwidth(&b, naf, v, w);
 		assert(b); (void)b;
 		EcT P2;
-		tbl[0] = x;
 		dbl(P2, x);
+		tbl[0] = x;
 		for (size_t i = 1; i < tblSize; i++) {
 			add(tbl[i], tbl[i - 1], P2);
 		}
@ -1191,69 +1191,58 @@ public:
 		mulArray(z, x, gmp::getUnit(y), gmp::getUnitSize(y), y < 0, constTime, false);
 	}
 	/*
-		z += sum_{i=0}^{n-1} xVec[i] * yVec[i]
+		z = sum_{i=0}^{n-1} xVec[i] * yVec[i]
+		return min(N, n)
 		@note &z != xVec[i]
 	*/
 private:
-	template<size_t N, class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline void addMulVecN(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
+	template<size_t N = 32, class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
+	static inline size_t addMulVecN(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
-		assert(n <= N);
-		EcT t;
+		if (n > N) n = N;
 		const int w = 5;
 		const size_t tblSize = 1 << (w - 2);
 		typedef mcl::FixedArray<int8_t, maxBitSize + 1> NafArray;
 		NafArray naf[N];
 		EcT tbl[N][tblSize];
-		bool b;
 		size_t maxBit = 0;
 		for (size_t i = 0; i < n; i++) {
+			bool b;
 			gmp::getNAFwidth(&b, naf[i], yVec[i].getMpz(), w);
 			assert(b); (void)b;
 			if (naf[i].size() > maxBit) maxBit = naf[i].size();
-			tbl[i][0] = xVec[i];
 			EcT P2;
-			EcT::dbl(P2, tbl[i][0]);
+			EcT::dbl(P2, xVec[i]);
+			tbl[i][0] = xVec[i];
 			for (size_t j = 1; j < tblSize; j++) {
 				EcT::add(tbl[i][j], tbl[i][j - 1], P2);
 			}
 		}
-		t.clear();
+		z.clear();
 		for (size_t i = 0; i < maxBit; i++) {
-			EcT::dbl(t, t);
+			EcT::dbl(z, z);
 			for (size_t j = 0; j < n; j++) {
-				local::addTbl(t, tbl[j], naf[j], maxBit - 1 - i);
+				local::addTbl(z, tbl[j], naf[j], maxBit - 1 - i);
 			}
 		}
-		z += t;
+		return n;
 	}

 public:
 	template<class tag, size_t maxBitSize, template<class _tag, size_t _maxBitSize>class FpT>
-	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n, bool old = false)
+	static inline void mulVec(EcT& z, const EcT *xVec, const FpT<tag, maxBitSize> *yVec, size_t n)
 	{
-		(void)old;
-#if 0
-if (!old) {
-		const size_t N = 16;
 		EcT r;
 		r.clear();
-		for (size_t i = 0; i < n; i += N) {
-			size_t remain = fp::min_(n - i, N);
-			addMulVecN<N>(r, xVec + i, yVec + i, remain);
-		}
-		z = r;
-} else {
-#else
-		EcT r, t;
-		r.clear();
-		for (size_t i = 0; i < n; i++) {
-			mul(t, xVec[i], yVec[i]);
+		while (n > 0) {
+			EcT t;
+			size_t done = addMulVecN(t, xVec, yVec, n);
 			r += t;
+			xVec += done;
+			yVec += done;
+			n -= done;
 		}
 		z = r;
-#endif
-//}
 	}
 #ifndef CYBOZU_DONT_USE_EXCEPTION
 	static inline void init(const std::string& astr, const std::string& bstr, int mode = ec::Jacobi)
--- a/test/ec_test.cpp
+++ b/test/ec_test.cpp
@ -550,31 +550,28 @@ void mulVec(const mcl::EcParam& para)
 	const Fp y(para.gy);
 	Ec P(x, y);
 	P += P;
-	const int N = 20;
+	const int N = 33;
 	Ec xVec[N];
 	Zn yVec[N];
 	Ec Q1, Q2;

-	Ec::dbl(xVec[0], P);
-	for (size_t i = 1; i < N; i++) {
-		xVec[i] += P;
+	Ec::dbl(P, P);
+	for (size_t i = 0; i < N; i++) {
+		Ec::mul(xVec[i], P, i + 3);
+		yVec[i].setByCSPRNG();
 	}
-	const size_t nTbl[] = { 1, 2, 3, 5, 15, 16, 17 };
+	const size_t nTbl[] = { 1, 2, 3, 5, 30, 31, 32, 33 };
+	const int C = 400;
 	for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(nTbl); i++) {
 		const size_t n = nTbl[i];
+		CYBOZU_TEST_ASSERT(n <= N);
 		naiveMulVec(Q1, xVec, yVec, n);
 		Ec::mulVec(Q2, xVec, yVec, n);
 		CYBOZU_TEST_EQUAL(Q1, Q2);
+		printf("n=%zd\n", n);
+		CYBOZU_BENCH_C("naive ", C, naiveMulVec, Q1, xVec, yVec, n);
+		CYBOZU_BENCH_C("mulVec", C, Ec::mulVec, Q1, xVec, yVec, n);
 	}
-	const int C = 1000;
-	CYBOZU_BENCH_C("naive (1)", C, naiveMulVec, Q1, xVec, yVec, 1);
-	CYBOZU_BENCH_C("mulVec(1)", C, Ec::mulVec, Q1, xVec, yVec, 1);
-	CYBOZU_BENCH_C("naive (2)", C, naiveMulVec, Q1, xVec, yVec, 2);
-	CYBOZU_BENCH_C("mulVec(2)", C, Ec::mulVec, Q1, xVec, yVec, 2);
-	CYBOZU_BENCH_C("naive (3)", C, naiveMulVec, Q1, xVec, yVec, 3);
-	CYBOZU_BENCH_C("mulVec(3)", C, Ec::mulVec, Q1, xVec, yVec, 3);
-	CYBOZU_BENCH_C("naive (9)", C, naiveMulVec, Q1, xVec, yVec, 9);
-	CYBOZU_BENCH_C("mulVec(9)", C, Ec::mulVec, Q1, xVec, yVec, 9);
 }

 void test_sub_sub(const mcl::EcParam& para, mcl::fp::Mode fpMode)