add gen_mcl_fpDbl_sqrPre

dev
MITSUNARI Shigeo 9 years ago
parent c03a7cef95
commit 3d620c880a
  1. 33
      src/gen.cpp
  2. 27
      src/mul.txt

@ -24,6 +24,7 @@ struct Code : public mcl::Generator {
FunctionMap mulPvM;
FunctionMap mcl_fp_mul_UnitPreM;
FunctionMap mcl_fpDbl_mulPreM;
FunctionMap mcl_fpDbl_sqrPreM;
Code() : unit(0), unit2(0), bit(0), N(0) { }
void gen_mulUU()
@ -544,16 +545,9 @@ struct Code : public mcl::Generator {
ret(Void);
endFunc();
}
void gen_mcl_fpDbl_mulPre()
void generic_fpDbl_mul(Operand& pz, Operand& px, Operand& py)
{
const int bu = bit + unit;
resetGlobalIdx();
Operand pz(IntPtr, unit);
Operand px(IntPtr, unit);
Operand py(IntPtr, unit);
std::string name = "mcl_fpDbl_mulPre" + cybozu::itoa(bit);
mcl_fpDbl_mulPreM[bit] = Function(name, Void, pz, px, py);
beginFunc(mcl_fpDbl_mulPreM[bit]);
Operand y = load(py);
Operand xy = call(mulPvM[bit], px, y);
store(trunc(xy, unit), pz);
@ -574,6 +568,28 @@ struct Code : public mcl::Generator {
pz = bitcast(pz, Operand(IntPtr, bu));
store(t, pz);
ret(Void);
}
void gen_mcl_fpDbl_mulPre()
{
resetGlobalIdx();
Operand pz(IntPtr, unit);
Operand px(IntPtr, unit);
Operand py(IntPtr, unit);
std::string name = "mcl_fpDbl_mulPre" + cybozu::itoa(bit);
mcl_fpDbl_mulPreM[bit] = Function(name, Void, pz, px, py);
beginFunc(mcl_fpDbl_mulPreM[bit]);
generic_fpDbl_mul(pz, px, py);
endFunc();
}
void gen_mcl_fpDbl_sqrPre()
{
resetGlobalIdx();
Operand py(IntPtr, unit);
Operand px(IntPtr, unit);
std::string name = "mcl_fpDbl_sqrPre" + cybozu::itoa(bit);
mcl_fpDbl_sqrPreM[bit] = Function(name, Void, py, px);
beginFunc(mcl_fpDbl_sqrPreM[bit]);
generic_fpDbl_mul(py, px, px);
endFunc();
}
void gen_all()
@ -598,6 +614,7 @@ struct Code : public mcl::Generator {
gen_mulPv();
gen_mcl_fp_mul_UnitPre();
gen_mcl_fpDbl_mulPre();
gen_mcl_fpDbl_sqrPre();
}
void setBit(uint32_t bit)
{

@ -1,30 +1,3 @@
@define bu = bit + unit
define void @mcl_fpDbl_sqrPre$(bit)(i$(unit)* %py, i$(unit)* %px) {
%x0 = load i$(unit)* %px
%xx0 = call i$(bu) @mulPv$(bit)x$(unit)(i$(unit) *%px, i$(unit) %x0)
%s0 = trunc i$(bu) %xx0 to i$(unit)
store i$(unit) %s0, i$(unit)* %py
%t0 = lshr i$(bu) %xx0, $(unit)
@for i, 1, N
%px$(i) = getelementptr i$(unit)* %px, i32 $(i)
%x$(i) = load i$(unit)* %px$(i)
%xx$(i) = call i$(bu) @mulPv$(bit)x$(unit)(i$(unit)* %px, i$(unit) %x$(i))
%a$(i) = add i$(bu) %t$(i-1), %xx$(i)
%s$(i) = trunc i$(bu) %a$(i) to i$(unit)
%py$(i) = getelementptr i$(unit)* %py, i32 $(i)
@if i < N - 1
store i$(unit) %s$(i), i$(unit)* %py$(i)
%t$(i) = lshr i$(bu) %a$(i), $(unit)
@endif
@endfor
%py$(N-1)e = bitcast i$(unit)* %py$(N-1) to i$(bu)*
store i$(bu) %a$(N-1), i$(bu)* %py$(N-1)e
ret void
}
@define bu = bit + unit
@define bu2 = bit + unit * 2

Loading…
Cancel
Save