65#if defined(FFTCONVOLVER_USE_SSE)
66 const size_t end4 = 4 * (len / 4);
67 for (
size_t i = 0; i < end4; i += 4)
69 const __m128 ra = _mm_load_ps(&reA[i]);
70 const __m128 rb = _mm_load_ps(&reB[i]);
71 const __m128 ia = _mm_load_ps(&imA[i]);
72 const __m128 ib = _mm_load_ps(&imB[i]);
73 __m128 real = _mm_load_ps(&re[i]);
74 __m128 imag = _mm_load_ps(&im[i]);
75 real = _mm_add_ps(real, _mm_mul_ps(ra, rb));
76 real = _mm_sub_ps(real, _mm_mul_ps(ia, ib));
77 _mm_store_ps(&re[i], real);
78 imag = _mm_add_ps(imag, _mm_mul_ps(ra, ib));
79 imag = _mm_add_ps(imag, _mm_mul_ps(ia, rb));
80 _mm_store_ps(&im[i], imag);
82 for (
size_t i = end4; i < len; ++i)
84 re[i] += reA[i] * reB[i] - imA[i] * imB[i];
85 im[i] += reA[i] * imB[i] + imA[i] * reB[i];
88 const size_t end4 = 4 * (len / 4);
89 for (
size_t i = 0; i < end4; i += 4)
91 re[i + 0] += reA[i + 0] * reB[i + 0] - imA[i + 0] * imB[i + 0];
92 re[i + 1] += reA[i + 1] * reB[i + 1] - imA[i + 1] * imB[i + 1];
93 re[i + 2] += reA[i + 2] * reB[i + 2] - imA[i + 2] * imB[i + 2];
94 re[i + 3] += reA[i + 3] * reB[i + 3] - imA[i + 3] * imB[i + 3];
95 im[i + 0] += reA[i + 0] * imB[i + 0] + imA[i + 0] * reB[i + 0];
96 im[i + 1] += reA[i + 1] * imB[i + 1] + imA[i + 1] * reB[i + 1];
97 im[i + 2] += reA[i + 2] * imB[i + 2] + imA[i + 2] * reB[i + 2];
98 im[i + 3] += reA[i + 3] * imB[i + 3] + imA[i + 3] * reB[i + 3];
100 for (
size_t i = end4; i < len; ++i)
102 re[i] += reA[i] * reB[i] - imA[i] * imB[i];
103 im[i] += reA[i] * imB[i] + imA[i] * reB[i];
void ComplexMultiplyAccumulate(SplitComplex &result, const SplitComplex &a, const SplitComplex &b)
Adds the complex product of two split-complex buffers to a result buffer.
Definition Utilities.cpp:53
void Sum(Sample *FFTCONVOLVER_RESTRICT result, const Sample *FFTCONVOLVER_RESTRICT a, const Sample *FFTCONVOLVER_RESTRICT b, size_t len)
Sums two given sample arrays.
Definition Utilities.cpp:36