#include #include #include "tensorTypes.h" // this function takes *inverse* RUs, so no negation is needed on the indexing // I had been negating the ru-idx, but this was causing a *negative* mod, resulting in a hard-to-find bug void primeD (double *y, hShort_t tupSize, hDim_t lts, hDim_t rts, hDim_t p, hDim_t rustride, complex_t* ruinv) { if(p == 2) { return; } hDim_t blockOffset, modOffset, tensorOffset; double *tempSpace = (double*)malloc((p-1)*sizeof(double)); hDim_t temp1 = rts*(p-1); for(blockOffset = 0; blockOffset < lts; blockOffset++) { hDim_t temp2 = blockOffset*temp1; for(modOffset = 0; modOffset < rts; modOffset++) { tensorOffset = temp2 + modOffset; hDim_t row, col; for(row = 0; row < p-1; row++) { double acc = 0; for(col = 1; col <= (p>>1); col++) { acc += 2 * ruinv[((row*col) % p)*rustride*tupSize].real * y[(tensorOffset+rts*(col-1))*tupSize]; } for(col = (p>>1)+1; col <= p-1; col++) { acc += 2 * ruinv[((row*col) % p)*rustride*tupSize].imag * y[(tensorOffset+rts*(col-1))*tupSize]; } tempSpace[row] = acc/sqrt(2); } for(row = 0; row < p-1; row++) { y[(tensorOffset+rts*row)*tupSize] = tempSpace[row]; } } } free(tempSpace); } void ppD (void *y, hShort_t tupSize, hDim_t lts, hDim_t rts, PrimeExponent pe, void *ruinv, hInt_t* qs) { hDim_t p = pe.prime; hDim_t e = pe.exponent; #ifdef DEBUG_MODE ASSERT(e != 0); #endif hDim_t mprime = ipow(p,e-1); for(int tupIdx = 0; tupIdx < tupSize; tupIdx++) { primeD (((double*)y)+tupIdx, tupSize, lts*mprime, rts, p, mprime, ((complex_t*)ruinv)+tupIdx); } } //the contents of y will be destroyed, but should be initialized in Haskell-land to independent Guassians over the reals void tensorGaussianDec (hShort_t tupSize, double *y, hDim_t totm, PrimeExponent *peArr, hShort_t sizeOfPE, complex_t** ruinv) { void** ruinvs = (void**)malloc(sizeOfPE*sizeof(void*)); hShort_t i; for(i = 0; i < sizeOfPE; i++) { ruinvs[i] = (void*) (ruinv[i]); } tensorFuserCRT (y, tupSize, ppD, totm, peArr, sizeOfPE, ruinvs, (hInt_t*)0); free(ruinvs); }