#include "tensorTypes.h" void gPowR (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p) { hDim_t tmp1 = rts*(p-1); hDim_t tmp2 = tmp1 - rts; hDim_t blockOffset, modOffset; hDim_t i; for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp3 = blockOffset * tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp3 + modOffset; hInt_t last = y[tensorOffset + tmp2]; for (i = p-2; i != 0; --i) { hDim_t idx = tensorOffset + i * rts; y[idx] += last - y[idx-rts]; } y[tensorOffset] += last; } } } void gPowRq (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p, hInt_t q) { hDim_t tmp1 = rts*(p-1); hDim_t tmp2 = tmp1 - rts; hDim_t blockOffset, modOffset; hDim_t i; for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp3 = blockOffset * tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp3 + modOffset; hInt_t last = y[tensorOffset + tmp2]; for (i = p-2; i != 0; --i) { hDim_t idx = tensorOffset + i * rts; y[idx] = (y[idx] + last - y[idx-rts]) % q; } y[tensorOffset] = (y[tensorOffset] + last) % q; } } } void ppGPowR (void* y, PrimeExponent pe, hDim_t lts, hDim_t rts, hInt_t q) { #ifdef DEBUG_MODE ASSERT (q==0); #endif hDim_t p = pe.prime; hShort_t e = pe.exponent; if (p != 2) { gPowR ((hInt_t*)y, lts*ipow(p,e-1), rts, p); } } void ppGPowRq (void* y, PrimeExponent pe, hDim_t lts, hDim_t rts, hInt_t q) { hDim_t p = pe.prime; hShort_t e = pe.exponent; if (p != 2) { gPowRq ((hInt_t*)y, lts*ipow(p,e-1), rts, p, q); } } void gDecR (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p) { hDim_t tmp1 = rts*(p-1); hDim_t blockOffset; hDim_t modOffset; hDim_t i; for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp2 = blockOffset * tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp2 + modOffset; hInt_t acc = y[tensorOffset]; for (i = p-2; i != 0; --i) { hDim_t idx = tensorOffset + i * rts; acc += y[idx]; y[idx] -= y[idx-rts]; } y[tensorOffset] += acc; } } } void gDecRq (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p, hInt_t q) { hDim_t tmp1 = rts*(p-1); hDim_t blockOffset; hDim_t modOffset; hDim_t i; for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp2 = blockOffset * tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp2 + modOffset; hInt_t acc = y[tensorOffset]; for (i = p-2; i != 0; --i) { hDim_t idx = tensorOffset + i * rts; // acc is at most p*q << 64 bits, so no need to mod acc = acc + y[idx]; y[idx] = (y[idx] - y[idx-rts]) % q; } y[tensorOffset] = (y[tensorOffset] + acc) % q; } } } void ppGDecR (void* y, PrimeExponent pe, hDim_t lts, hDim_t rts, hInt_t q) { #ifdef DEBUG_MODE ASSERT (q==0); #endif hDim_t p = pe.prime; hShort_t e = pe.exponent; if (p != 2) { gDecR ((hInt_t*)y, lts*ipow(p,e-1), rts, p); } } void ppGDecRq (void* y, PrimeExponent pe, hDim_t lts, hDim_t rts, hInt_t q) { hDim_t p = pe.prime; hShort_t e = pe.exponent; if (p != 2) { gDecRq ((hInt_t*)y, lts*ipow(p,e-1), rts, p, q); } } void gInvPowR (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p) { hDim_t tmp1 = rts * (p-1); hDim_t blockOffset, modOffset; hDim_t i; for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp2 = blockOffset * tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp2 + modOffset; hInt_t lelts = 0; for (i = 0; i < p-1; ++i) { lelts += y[tensorOffset + i*rts]; } hInt_t relts = 0; for (i = p-2; i >= 0; --i) { hDim_t idx = tensorOffset + i*rts; hInt_t z = y[idx]; y[idx] = (p-1-i) * lelts - (i+1)*relts; lelts -= z; relts += z; } } } } void gInvPowRq (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p, hInt_t q) { hDim_t tmp1 = rts * (p-1); hDim_t blockOffset, modOffset; hDim_t i; for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp2 = blockOffset * tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp2 + modOffset; hInt_t lelts = 0; //lelts is at most p*q, so we can mod once at the end for (i = 0; i < p-1; ++i) { lelts = lelts + y[tensorOffset + i*rts]; } lelts = lelts % q; //in the next loop, lelts <= p*q and relts <= p*q //products are <= p*p*q, and diff is <= 2*p*p*q //so we assume 2*p^2 << 31 bits hInt_t relts = 0; for (i = p-2; i >= 0; --i) { hDim_t idx = tensorOffset + i*rts; hInt_t z = y[idx]; y[idx] = (((p-1-i) * lelts) - ((i+1)*relts)) % q; lelts -= z; relts += z; } } } } void ppGInvPowR (void* y, PrimeExponent pe, hDim_t lts, hDim_t rts, hInt_t q) { #ifdef DEBUG_MODE ASSERT (q==0); #endif hDim_t p = pe.prime; hShort_t e = pe.exponent; if (p != 2) { gInvPowR ((hInt_t*)y, lts*ipow(p,e-1), rts, p); } } void ppGInvPowRq (void* y, PrimeExponent pe, hDim_t lts, hDim_t rts, hInt_t q) { hDim_t p = pe.prime; hShort_t e = pe.exponent; if (p != 2) { gInvPowRq ((hInt_t*)y, lts*ipow(p,e-1), rts, p, q); } } //do not call for p=2! void gCRTRq (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p, hInt_t* gcoeffs, hInt_t q) { hDim_t gindex; hDim_t blockOffset, modOffset, idx; hDim_t temp1 = rts*(p-1); for(gindex = 0; gindex < p-1; gindex++) { hInt_t coeff = gcoeffs[gindex]; hDim_t temp3 = gindex*rts; for(blockOffset = 0; blockOffset < lts; blockOffset++) { hDim_t temp2 = blockOffset*temp1 + temp3; for(modOffset = 0; modOffset < rts; modOffset++) { idx = temp2 + modOffset; y[idx] = (y[idx]*coeff)%q; } } } } //do not call for p=2! void gCRTC (complex_t* y, hDim_t lts, hDim_t rts, hDim_t p, complex_t* gcoeffs) { hDim_t gindex; hDim_t blockOffset, modOffset, idx; hDim_t temp1 = rts*(p-1); for(gindex = 0; gindex < p-1; gindex++) { complex_t coeff = gcoeffs[gindex]; hDim_t temp3 = gindex*rts; for(blockOffset = 0; blockOffset < lts; blockOffset++) { hDim_t temp2 = blockOffset*temp1 + temp3; for(modOffset = 0; modOffset < rts; modOffset++) { idx = temp2 + modOffset; CMPLX_IMUL(y[idx],coeff); } } } } void ppGCRTRq (void* y, hDim_t lts, hDim_t rts, PrimeExponent pe, void* gcoeffs, hInt_t q) { hDim_t p = pe.prime; hShort_t e = pe.exponent; #ifdef DEBUG_MODE printf("gcoeffs for p=%" PRId32 ", e=%" PRId16 "\t[", pe.prime, pe.exponent); int i; for(i = 0; i < ((p-1)*ipow(p,e-1)); i++) { printf("%" PRId64 ",", ((hInt_t*)gcoeffs)[i]); } printf("]\n"); #endif if (p != 2) { gCRTRq ((hInt_t*)y, lts*ipow(p,e-1), rts, p, (hInt_t*)gcoeffs, q); } } void ppGCRTC (void* y, hDim_t lts, hDim_t rts, PrimeExponent pe, void* gcoeffs, hInt_t q) { hDim_t p = pe.prime; hShort_t e = pe.exponent; #ifdef DEBUG_MODE printf("gcoeffs for p=%" PRId32 ", e=%" PRId16 "\t[", pe.prime, pe.exponent); int i; for(i = 0; i < ((p-1)*ipow(p,e-1)); i++) { printf("(%f,%f),", ((complex_t*)gcoeffs)[i].real, ((complex_t*)gcoeffs)[i].imag); } printf("]\n"); #endif if (p != 2) { gCRTC ((complex_t*)y, lts*ipow(p,e-1), rts, p, (complex_t*)gcoeffs); } } void gInvDecR (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p) { hDim_t blockOffset; hDim_t modOffset; hDim_t i; hDim_t tmp1 = rts*(p-1); for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp2 = blockOffset*tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp2 + modOffset; hInt_t lastOut = 0; for (i=1; i < p; ++i) { lastOut += i * y[tensorOffset + (i-1)*rts]; } hInt_t acc = lastOut / p; ASSERT (acc * p == lastOut); // this line asserts that lastOut % p == 0, without calling % operator for (i = p-2; i > 0; --i) { hDim_t idx = tensorOffset + i*rts; hInt_t tmp = acc; acc -= y[idx]; // we already divided acc by p, do not multiply y[idx] by p y[idx] = tmp; } y[tensorOffset] = acc; } } } void gInvDecRq (hInt_t* y, hDim_t lts, hDim_t rts, hDim_t p, hInt_t q) { hDim_t blockOffset; hDim_t modOffset; hDim_t i; hDim_t tmp1 = rts*(p-1); hInt_t reciprocalOfP = reciprocal (q,p); for (blockOffset = 0; blockOffset < lts; ++blockOffset) { hDim_t tmp2 = blockOffset*tmp1; for (modOffset = 0; modOffset < rts; ++modOffset) { hDim_t tensorOffset = tmp2 + modOffset; hInt_t lastOut = 0; for (i=1; i < p; ++i) { lastOut += (i * y[tensorOffset + (i-1)*rts]); } //in the previous loop, |lastOut| <= p*p*q lastOut = lastOut % q; hInt_t acc = (lastOut * reciprocalOfP) % q; // |acc| <= p*q for (i = p-2; i > 0; --i) { hDim_t idx = tensorOffset + i*rts; hInt_t tmp = acc; acc = acc - y[idx]; y[idx] = tmp % q; } y[tensorOffset] = acc % q; } } } void ppGInvDecR (void* y, PrimeExponent pe, hDim_t lts, hDim_t rts, hInt_t q) { #ifdef DEBUG_MODE ASSERT (q==0); #endif hDim_t p = pe.prime; hShort_t e = pe.exponent; if (p != 2) { gInvDecR ((hInt_t*)y, lts*ipow(p,e-1), rts, p); } } void ppGInvDecRq (void* y, PrimeExponent pe, hDim_t lts, hDim_t rts, hInt_t q) { hDim_t p = pe.prime; hShort_t e = pe.exponent; if (p != 2) { gInvDecRq ((hInt_t*)y, lts*ipow(p,e-1), rts, p, q); } } #ifdef STATS int gprCtr = 0; int gprqCtr = 0; int gdrCtr = 0; int gdrqCtr = 0; int giprCtr = 0; int giprqCtr = 0; int gidrCtr = 0; int gidrqCtr = 0; int gcrqCtr = 0; int gccCtr = 0; int gicrqCtr = 0; int giccCtr = 0; struct timespec gprTime = {0,0}; struct timespec gprqTime = {0,0}; struct timespec gdrTime = {0,0}; struct timespec gdrqTime = {0,0}; struct timespec giprTime = {0,0}; struct timespec giprqTime = {0,0}; struct timespec gidrTime = {0,0}; struct timespec gidrqTime = {0,0}; struct timespec gcrqTime = {0,0}; struct timespec gccTime = {0,0}; #endif void tensorGPowR (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE) { #ifdef STATS gprCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif tensorFuser (y, ppGPowR, totm, peArr, sizeOfPE, 0); #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); gprTime = tsAdd(gprTime, tsSubtract(t1,s1)); #endif } void tensorGPowRq (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE, hInt_t q) { #ifdef STATS gprqCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif tensorFuser (y, ppGPowRq, totm, peArr, sizeOfPE, q); hDim_t j; for(j = 0; j < totm; j++) { if(y[j]<0) { y[j]+=q; } } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); gprqTime = tsAdd(gprqTime, tsSubtract(t1,s1)); #endif } void tensorGDecR (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE) { #ifdef STATS gdrCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif tensorFuser (y, ppGDecR, totm, peArr, sizeOfPE, 0); #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); gdrTime = tsAdd(gdrTime, tsSubtract(t1,s1)); #endif } void tensorGDecRq (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE, hInt_t q) { #ifdef STATS gdrqCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif tensorFuser (y, ppGDecRq, totm, peArr, sizeOfPE, q); hDim_t j; for(j = 0; j < totm; j++) { if(y[j]<0) { y[j]+=q; } } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); gdrqTime = tsAdd(gdrqTime, tsSubtract(t1,s1)); #endif } void tensorGInvPowR (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE) { #ifdef STATS giprCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif tensorFuser (y, ppGInvPowR, totm, peArr, sizeOfPE, 0); #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); giprTime = tsAdd(giprTime, tsSubtract(t1,s1)); #endif } void tensorGInvPowRq (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE, hInt_t q) { #ifdef STATS giprqCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif tensorFuser (y, ppGInvPowRq, totm, peArr, sizeOfPE, q); hDim_t j; for(j = 0; j < totm; j++) { if(y[j]<0) { y[j]+=q; } } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); giprqTime = tsAdd(giprqTime, tsSubtract(t1,s1)); #endif } void tensorGInvDecR (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE) { #ifdef STATS gidrCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif tensorFuser (y, ppGInvDecR, totm, peArr, sizeOfPE, 0); #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); gidrTime = tsAdd(gidrTime, tsSubtract(t1,s1)); #endif } void tensorGInvDecRq (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE, hInt_t q) { #ifdef STATS gidrqCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif tensorFuser (y, ppGInvDecRq, totm, peArr, sizeOfPE, q); hDim_t j; for(j = 0; j < totm; j++) { if(y[j]<0) { y[j]+=q; } } #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); gidrqTime = tsAdd(gidrqTime, tsSubtract(t1,s1)); #endif } void tensorGCRTRq (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE, hInt_t** gcoeffs, hInt_t q) { #ifdef STATS gcrqCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif #ifdef DEBUG_MODE printf("\n\nEntered tensorGCRTRq\ttotm=%" PRId32 "\tnumFacts=%" PRId16 "\tq=%" PRId64 "\n[", totm, sizeOfPE, q); hDim_t j; for(j = 0; j < totm; j++) { printf("%" PRId64 ",", y[j]); } printf("]\n["); for(j = 0; j < sizeOfPE; j++) { printf("(%" PRId32 ",%" PRId16 "),", peArr[j].prime, peArr[j].exponent); } printf("]\n"); #endif void** vgcoeffs = (void**)malloc(sizeOfPE*sizeof(void*)); hDim_t i; for(i = 0; i < sizeOfPE; i++) { vgcoeffs[i] = (void*) (gcoeffs[i]); } tensorFuserCRT (y, ppGCRTRq, totm, peArr, sizeOfPE, vgcoeffs, q); #ifdef DEBUG_MODE for(j = 0; j < totm; j++) { if(y[j]<0) { printf("tensorGCRTRq\n"); } } #endif #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); gcrqTime = tsAdd(gcrqTime, tsSubtract(t1,s1)); #endif } void tensorGCRTC (complex_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE, complex_t** gcoeffs) { #ifdef STATS gccCtr++; struct timespec s1,t1; clock_gettime(CLOCK_REALTIME, &s1); #endif #ifdef DEBUG_MODE printf("\n\nEntered tensorGCRTC\ttotm=%" PRId32 "\tnumFacts=%" PRId16 "\n[", totm, sizeOfPE); hDim_t j; for(j = 0; j < totm; j++) { printf("(%f,%f),", (y[j]).real, (y[j]).imag); } printf("]\n["); for(j = 0; j < sizeOfPE; j++) { printf("(%" PRId32 ",%" PRId16 "),", peArr[j].prime, peArr[j].exponent); } printf("]\n"); #endif void** vgcoeffs = (void**)malloc(sizeOfPE*sizeof(void*)); hDim_t i; for(i = 0; i < sizeOfPE; i++) { vgcoeffs[i] = (void*) (gcoeffs[i]); } tensorFuserCRT (y, ppGCRTC, totm, peArr, sizeOfPE, vgcoeffs, 0); #ifdef STATS clock_gettime(CLOCK_REALTIME, &t1); gccTime = tsAdd(gccTime, tsSubtract(t1,s1)); #endif } void tensorGInvCRTRq (hInt_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE, hInt_t** gcoeffs, hInt_t q) { #ifdef STATS gicrqCtr++; #endif tensorGCRTRq (y, totm, peArr, sizeOfPE, gcoeffs, q); //output is already shifted } void tensorGInvCRTC (complex_t* y, hDim_t totm, PrimeExponent* peArr, hShort_t sizeOfPE, complex_t** gcoeffs) { #ifdef STATS giccCtr++; #endif tensorGCRTC (y, totm, peArr, sizeOfPE, gcoeffs); }