/* * Common functions used by filters, decimators and resamplers. * Includes real and complex dot products and SIMD horizontal additions. * TODO: better performance might be possible by avoiding unaligned memory accesses. */ #include /* * Real horizontal addition */ static inline __m128 sse_hadd_R(__m128 in){ __m128 accum; accum = _mm_hadd_ps(in, in); return _mm_hadd_ps(accum, accum); } static inline __m128 avx_hadd_R(__m256 in){ __m128 res1 = _mm256_extractf128_ps(in, 0); __m128 res2 = _mm256_extractf128_ps(in, 1); res1 = _mm_hadd_ps(res1, res1); res1 = _mm_hadd_ps(res1, res1); res2 = _mm_hadd_ps(res2, res2); res2 = _mm_hadd_ps(res2, res2); return _mm_add_ss(res1, res2); } /* * Real dot products */ static inline float dotprod_R(int num, float *a, float *b){ int i; float accum = 0; for(i=0; i