#include "tensorTypes.h" #ifdef CINTRIN #include #endif #ifdef STATS int mulCtr = 0; struct timespec mulTime = {0,0}; int addCtr = 0; struct timespec addTime = {0,0}; #endif //a = zipWith (*) a b void mulRq (hShort_t tupSize, hInt_t* a, hInt_t* b, hDim_t totm, hInt_t* qs) { #ifdef STATS mulCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif for(int tupIdx = 0; tupIdx < tupSize; tupIdx++) { hInt_t q = qs[tupIdx]; for(int i = 0; i < totm; i++) { a[i*tupSize+tupIdx] = (a[i*tupSize+tupIdx]*b[i*tupSize+tupIdx])%q; } } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); mulTime = tsAdd(mulTime, tsSubtract(t1,s1)); #endif } /* void mulMq (hShort_t tupSize, hInt_t* a, const hInt_t* b, const hDim_t totm, const hByte_t logr, const hInt_t k, const hInt_t q) { #ifdef STATS mulCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif hInt_t mask = (1<>logr; } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); mulTime = tsAdd(mulTime, tsSubtract(t1,s1)); #endif } */ void mulC (hShort_t tupSize, complex_t* a, complex_t* b, hDim_t totm) { #ifdef STATS mulCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif for(int i = 0; i < totm*tupSize; i++) { CMPLX_IMUL(a[i],b[i]); } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); mulTime = tsAdd(mulTime, tsSubtract(t1,s1)); #endif } //a = zipWith (+) a b void addRq (hShort_t tupSize, hInt_t* a, const hInt_t* b, const hDim_t totm, const hInt_t* qs) { #ifdef STATS addCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif /* need to check this code more carefully when tupSize > 1 #ifdef CINTRIN for(int tupIdx = 0; tupIdx < tupSize; tupIdx++) { hInt_t q = qs[tupIdx]; __m128i qss = _mm_set1_epi64x(q); for(int i = 0; i < totm; i+=2) { __m128i xs = _mm_load_si128((const __m128i*)(a+(i*tupSize+tupIdx))); __m128i ys = _mm_load_si128((const __m128i*)(b+(i*tupSize+tupIdx))); __m128i zs = _mm_add_epi64(xs,ys); zs = _mm_rem_epi64(zs,qss); _mm_store_si128((__m128i*)(a+(i*tupSize+tupIdx)),zs); } } #else */ for(int tupIdx = 0; tupIdx < tupSize; tupIdx++) { hInt_t q = qs[tupIdx]; for(int i = 0; i < totm; i++) { hInt_t temp = a[i*tupSize+tupIdx]+b[i*tupSize+tupIdx]; if (temp >= q) a[i*tupSize+tupIdx]=temp-q; else a[i*tupSize+tupIdx] = temp; } } //#endif #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); addTime = tsAdd(addTime, tsSubtract(t1,s1)); #endif } /* void addMq (hInt_t* a, const hInt_t* b, const hDim_t totm, const hByte_t logr, const hInt_t k, const hInt_t q) { #ifdef STATS addCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif hInt_t twoq = q<<1; for(int i = 0; i < totm; i++) { hInt_t temp = (a[i]+b[i]); if (temp >= twoq) a[i]=temp-twoq; else a[i] = temp; } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); addTime = tsAdd(addTime, tsSubtract(t1,s1)); #endif } */ //a = zipWith (+) a b void addR (hShort_t tupSize, hInt_t* a, hInt_t* b, hDim_t totm) { #ifdef STATS addCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif for(int i = 0; i < totm*tupSize; i++) { a[i] += b[i]; } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); addTime = tsAdd(addTime, tsSubtract(t1,s1)); #endif } void addC (hShort_t tupSize, complex_t* a, complex_t* b, hDim_t totm) { #ifdef STATS addCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif for(int i = 0; i < totm*tupSize; i++) { CMPLX_IADD(a[i],b[i]); } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); addTime = tsAdd(addTime, tsSubtract(t1,s1)); #endif } void addD (hShort_t tupSize, double* a, double* b, hDim_t totm) { #ifdef STATS addCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif for(int i = 0; i < totm*tupSize; i++) { a[i]+=b[i]; } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); addTime = tsAdd(addTime, tsSubtract(t1,s1)); #endif }