61 static constexpr size_t n =
sizeof (vSIMDType) /
sizeof (ScalarType);
62 static constexpr size_t mask = (
sizeof (vSIMDType) /
sizeof (ScalarType)) - 1;
63 static constexpr size_t bits = SIMDInternal::Log2Helper<(int) n>::value;
66 using MaskType = SIMDInternal::MaskType<ScalarType>;
67 union UnionType { vSIMDType v; ScalarType s[n]; };
68 union UnionMaskType { vSIMDType v; MaskType m[n]; };
72 static forcedinline vSIMDType add (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarAdd> (a, b); }
73 static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarSub> (a, b); }
74 static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarMul> (a, b); }
75 static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b)
noexcept {
return bitapply<ScalarAnd> (a, b); }
76 static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b)
noexcept {
return bitapply<ScalarOr > (a, b); }
77 static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b)
noexcept {
return bitapply<ScalarXor> (a, b); }
78 static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b)
noexcept {
return bitapply<ScalarNot> (a, b); }
80 static forcedinline vSIMDType min (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarMin> (a, b); }
81 static forcedinline vSIMDType max (vSIMDType a, vSIMDType b)
noexcept {
return apply<ScalarMax> (a, b); }
82 static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b)
noexcept {
return cmp<ScalarEq > (a, b); }
83 static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b)
noexcept {
return cmp<ScalarNeq> (a, b); }
84 static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b)
noexcept {
return cmp<ScalarGt > (a, b); }
85 static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b)
noexcept {
return cmp<ScalarGeq> (a, b); }
87 static forcedinline ScalarType get (vSIMDType v,
size_t i)
noexcept
93 static forcedinline vSIMDType set (vSIMDType v,
size_t i, ScalarType s)
noexcept
101 static forcedinline vSIMDType bit_not (vSIMDType av)
noexcept
103 UnionMaskType a {av};
105 for (
size_t i = 0; i < n; ++i)
111 static forcedinline ScalarType sum (vSIMDType av)
noexcept
114 auto retval =
static_cast<ScalarType
> (0);
116 for (
size_t i = 0; i < n; ++i)
117 retval =
static_cast<ScalarType
> (retval + a.s[i]);
122 static forcedinline vSIMDType truncate (vSIMDType av)
noexcept
126 for (
size_t i = 0; i < n; ++i)
127 a.s[i] =
static_cast<ScalarType
> (
static_cast<int> (a.s[i]));
132 static forcedinline vSIMDType multiplyAdd (vSIMDType av, vSIMDType bv, vSIMDType cv)
noexcept
134 UnionType a {av}, b {bv}, c {cv};
136 for (
size_t i = 0; i < n; ++i)
137 a.s[i] += b.s[i] * c.s[i];
143 static forcedinline
bool allEqual (vSIMDType av, vSIMDType bv)
noexcept
145 UnionType a {av}, b {bv};
147 for (
size_t i = 0; i < n; ++i)
148 if (! exactlyEqual (a.s[i], b.s[i]))
155 static forcedinline vSIMDType cmplxmul (vSIMDType av, vSIMDType bv)
noexcept
157 UnionType a {av}, b {bv}, r;
159 const int m = n >> 1;
160 for (
int i = 0; i < m; ++i)
162 std::complex<ScalarType> result
163 = std::complex<ScalarType> (a.s[i<<1], a.s[(i<<1)|1])
164 * std::complex<ScalarType> (b.s[i<<1], b.s[(i<<1)|1]);
166 r.s[i<<1] = result.real();
167 r.s[(i<<1)|1] = result.imag();
173 struct ScalarAdd {
static forcedinline ScalarType op (ScalarType a, ScalarType b)
noexcept {
return a + b; } };
174 struct ScalarSub {
static forcedinline ScalarType op (ScalarType a, ScalarType b)
noexcept {
return a - b; } };
175 struct ScalarMul {
static forcedinline ScalarType op (ScalarType a, ScalarType b)
noexcept {
return a * b; } };
176 struct ScalarMin {
static forcedinline ScalarType op (ScalarType a, ScalarType b)
noexcept {
return jmin (a, b); } };
177 struct ScalarMax {
static forcedinline ScalarType op (ScalarType a, ScalarType b)
noexcept {
return jmax (a, b); } };
178 struct ScalarAnd {
static forcedinline MaskType op (MaskType a, MaskType b)
noexcept {
return a & b; } };
179 struct ScalarOr {
static forcedinline MaskType op (MaskType a, MaskType b)
noexcept {
return a | b; } };
180 struct ScalarXor {
static forcedinline MaskType op (MaskType a, MaskType b)
noexcept {
return a ^ b; } };
181 struct ScalarNot {
static forcedinline MaskType op (MaskType a, MaskType b)
noexcept {
return (~a) & b; } };
182 struct ScalarEq {
static forcedinline
bool op (ScalarType a, ScalarType b)
noexcept {
return exactlyEqual (a, b); } };
183 struct ScalarNeq {
static forcedinline
bool op (ScalarType a, ScalarType b)
noexcept {
return ! exactlyEqual (a, b); } };
184 struct ScalarGt {
static forcedinline
bool op (ScalarType a, ScalarType b)
noexcept {
return (a > b); } };
185 struct ScalarGeq {
static forcedinline
bool op (ScalarType a, ScalarType b)
noexcept {
return (a >= b); } };
188 template <
typename Op>
189 static forcedinline vSIMDType apply (vSIMDType av, vSIMDType bv)
noexcept
191 UnionType a {av}, b {bv};
193 for (
size_t i = 0; i < n; ++i)
194 a.s[i] = Op::op (a.s[i], b.s[i]);
199 template <
typename Op>
200 static forcedinline vSIMDType cmp (vSIMDType av, vSIMDType bv)
noexcept
202 UnionType a {av}, b {bv};
205 for (
size_t i = 0; i < n; ++i)
206 r.m[i] = Op::op (a.s[i], b.s[i]) ?
static_cast<MaskType
> (-1) :
static_cast<MaskType
> (0);
211 template <
typename Op>
212 static forcedinline vSIMDType bitapply (vSIMDType av, vSIMDType bv)
noexcept
214 UnionMaskType a {av}, b {bv};
216 for (
size_t i = 0; i < n; ++i)
217 a.m[i] = Op::op (a.m[i], b.m[i]);
222 static forcedinline vSIMDType expand (ScalarType s)
noexcept
226 for (
size_t i = 0; i < n; ++i)
232 static forcedinline vSIMDType load (
const ScalarType* a)
noexcept
236 for (
size_t i = 0; i < n; ++i)
242 static forcedinline
void store (vSIMDType av, ScalarType* dest)
noexcept
246 for (
size_t i = 0; i < n; ++i)
250 template <
unsigned int shuffle_
idx>
251 static forcedinline vSIMDType shuffle (vSIMDType av)
noexcept
257 for (
size_t i = 0; i < n; ++i)
258 r.s[i] = a.s[(shuffle_idx >> (bits * i)) & mask];