#include #include #include float distance_l2_float(float *p1, float *p2, int len) { float ret=0; int i=0; for (i=0; i dist2) return NAN; } return sqrt(ret); } double distance_l2_double(double *p1, double *p2, int len) { double ret=0; int i=0; for (i=0; i dist2) return NAN; } return sqrt(ret); } /******************************************************************************/ /* __m128 */ float distance_l2_m128(__m128 *p1, __m128 *p2, int len) { /*printf("distance_l2_m128; p1=%d; p2=%d; len=%d\n", ((unsigned int)p1%16), ((unsigned int)p2%16), len);*/ float ret=0; __m128 sum={0,0,0,0}; float fsum[4]; int i=0; for (i=0; i dist2) { return dist2; } } } _mm_store_ps(fsum,sum); ret = fsum[0] + fsum[1] + fsum[2] + fsum[3]; return sqrt(ret); } float distanceUB_l2_m128_blurp(__m128 *p1, __m128 *p2, int len, float dist) { /*printf("distance_l2_m128; p1=%d; p2=%d; len=%d\n", ((unsigned int)p1%16), ((unsigned int)p2%16), len);*/ float ret=0; float dist2=dist*dist; __m128 sum={0,0,0,0}; float fsum[4]; int i=0; for (i=0; i dist2/4) { return dist2; }*/ /* i++; diff = _mm_sub_ps(p1[i],p2[i]); diff = _mm_mul_ps(diff,diff); _mm_hadd_ps(sum */ /*_mm_store_ss(fsum,sum);*/ /*if (fsum[0] > dist2/4) {*/ _mm_store_ps(fsum,sum); float tmpsum=fsum[0]+fsum[1]+fsum[2]+fsum[3]; if (tmpsum > dist2) { return tmpsum; } /*}*/ } } _mm_store_ps(fsum,sum); ret = fsum[0] + fsum[1] + fsum[2] + fsum[3]; /* for (i*=4; i4&&i%4==3) {*/ if (i%4==1) { _mm_store_ss(fsum,sum); if (fsum[0] > dist2) { return fsum[0]; } /* i++; diff = _mm_sub_ps(p1[i],p2[i]); diff = _mm_mul_ps(diff,diff); _mm_hadd_ps(sum _mm_store_ss(fsum,sum); if (fsum[0] > dist2/4) { _mm_store_ps(fsum,sum); float tmpsum=fsum[0]+fsum[1]+fsum[2]+fsum[3]; if (tmpsum > dist2) { return tmpsum; } } */ } } _mm_store_ps(fsum,sum); ret = fsum[0] + fsum[1] + fsum[2] + fsum[3]; return sqrt(ret); } float isFartherThan_l2_m128(__m128 *p1, __m128 *p2, int len, float dist) { float ret=0; float dist2=dist*dist; __m128 sum={0,0,0,0}; float fsum[4]; int i=0; for (i=0; i dist2/4) { _mm_store_ps(fsum,sum); if (fsum[0]+fsum[1]+fsum[2]+fsum[3] > dist2) { return NAN; } } } } _mm_store_ps(fsum,sum); ret = fsum[0] + fsum[1] + fsum[2] + fsum[3]; for (i*=4; i dist2/4) { if (sum[0]+sum[1]+sum[2]+sum[3] > dist2) { return NAN; } } } ret = sum[0] + sum[1] + sum[2] + sum[3]; for (i*=4; i dist2) { return NAN; } return sqrt(ret); } float isFartherThan_l2_m128_nocheck(__m128 *p1, __m128 *p2, int len, float dist) { float ret=0; float dist2=dist*dist; __m128 sum={0,0,0,0}; int i=0; for (i=0; i dist2) { return NAN; } } } ret = fsum[0] + fsum[1]; for (i*=2; i