7 L2Sqr(
const void *pVect1v,
const void *pVect2v,
const void *qty_ptr) {
8 float *pVect1 = (
float *) pVect1v;
9 float *pVect2 = (
float *) pVect2v;
10 size_t qty = *((
size_t *) qty_ptr);
13 for (
size_t i = 0; i < qty; i++) {
14 float t = *pVect1 - *pVect2;
22#if defined(USE_AVX512)
26 L2SqrSIMD16ExtAVX512(
const void *pVect1v,
const void *pVect2v,
const void *qty_ptr) {
27 float *pVect1 = (
float *) pVect1v;
28 float *pVect2 = (
float *) pVect2v;
29 size_t qty = *((
size_t *) qty_ptr);
30 float PORTABLE_ALIGN64 TmpRes[16];
31 size_t qty16 = qty >> 4;
33 const float *pEnd1 = pVect1 + (qty16 << 4);
36 __m512 sum = _mm512_set1_ps(0);
38 while (pVect1 < pEnd1) {
39 v1 = _mm512_loadu_ps(pVect1);
41 v2 = _mm512_loadu_ps(pVect2);
43 diff = _mm512_sub_ps(v1, v2);
45 sum = _mm512_add_ps(sum, _mm512_mul_ps(diff, diff));
48 _mm512_store_ps(TmpRes, sum);
49 float res = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] +
50 TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] +
51 TmpRes[13] + TmpRes[14] + TmpRes[15];
61 L2SqrSIMD16ExtAVX(
const void *pVect1v,
const void *pVect2v,
const void *qty_ptr) {
62 float *pVect1 = (
float *) pVect1v;
63 float *pVect2 = (
float *) pVect2v;
64 size_t qty = *((
size_t *) qty_ptr);
65 float PORTABLE_ALIGN32 TmpRes[8];
66 size_t qty16 = qty >> 4;
68 const float *pEnd1 = pVect1 + (qty16 << 4);
71 __m256 sum = _mm256_set1_ps(0);
73 while (pVect1 < pEnd1) {
74 v1 = _mm256_loadu_ps(pVect1);
76 v2 = _mm256_loadu_ps(pVect2);
78 diff = _mm256_sub_ps(v1, v2);
79 sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
81 v1 = _mm256_loadu_ps(pVect1);
83 v2 = _mm256_loadu_ps(pVect2);
85 diff = _mm256_sub_ps(v1, v2);
86 sum = _mm256_add_ps(sum, _mm256_mul_ps(diff, diff));
89 _mm256_store_ps(TmpRes, sum);
90 return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
98 L2SqrSIMD16ExtSSE(
const void *pVect1v,
const void *pVect2v,
const void *qty_ptr) {
99 float *pVect1 = (
float *) pVect1v;
100 float *pVect2 = (
float *) pVect2v;
101 size_t qty = *((
size_t *) qty_ptr);
102 float PORTABLE_ALIGN32 TmpRes[8];
103 size_t qty16 = qty >> 4;
105 const float *pEnd1 = pVect1 + (qty16 << 4);
108 __m128 sum = _mm_set1_ps(0);
110 while (pVect1 < pEnd1) {
112 v1 = _mm_loadu_ps(pVect1);
114 v2 = _mm_loadu_ps(pVect2);
116 diff = _mm_sub_ps(v1, v2);
117 sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
119 v1 = _mm_loadu_ps(pVect1);
121 v2 = _mm_loadu_ps(pVect2);
123 diff = _mm_sub_ps(v1, v2);
124 sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
126 v1 = _mm_loadu_ps(pVect1);
128 v2 = _mm_loadu_ps(pVect2);
130 diff = _mm_sub_ps(v1, v2);
131 sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
133 v1 = _mm_loadu_ps(pVect1);
135 v2 = _mm_loadu_ps(pVect2);
137 diff = _mm_sub_ps(v1, v2);
138 sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
141 _mm_store_ps(TmpRes, sum);
142 return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
146#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
150 L2SqrSIMD16ExtResiduals(
const void *pVect1v,
const void *pVect2v,
const void *qty_ptr) {
151 size_t qty = *((
size_t *) qty_ptr);
152 size_t qty16 = qty >> 4 << 4;
153 float res = L2SqrSIMD16Ext(pVect1v, pVect2v, &qty16);
154 float *pVect1 = (
float *) pVect1v + qty16;
155 float *pVect2 = (
float *) pVect2v + qty16;
157 size_t qty_left = qty - qty16;
158 float res_tail =
L2Sqr(pVect1, pVect2, &qty_left);
159 return (res + res_tail);
166 L2SqrSIMD4Ext(
const void *pVect1v,
const void *pVect2v,
const void *qty_ptr) {
167 float PORTABLE_ALIGN32 TmpRes[8];
168 float *pVect1 = (
float *) pVect1v;
169 float *pVect2 = (
float *) pVect2v;
170 size_t qty = *((
size_t *) qty_ptr);
173 size_t qty4 = qty >> 2;
175 const float *pEnd1 = pVect1 + (qty4 << 2);
178 __m128 sum = _mm_set1_ps(0);
180 while (pVect1 < pEnd1) {
181 v1 = _mm_loadu_ps(pVect1);
183 v2 = _mm_loadu_ps(pVect2);
185 diff = _mm_sub_ps(v1, v2);
186 sum = _mm_add_ps(sum, _mm_mul_ps(diff, diff));
188 _mm_store_ps(TmpRes, sum);
189 return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
193 L2SqrSIMD4ExtResiduals(
const void *pVect1v,
const void *pVect2v,
const void *qty_ptr) {
194 size_t qty = *((
size_t *) qty_ptr);
195 size_t qty4 = qty >> 2 << 2;
197 float res = L2SqrSIMD4Ext(pVect1v, pVect2v, &qty4);
198 size_t qty_left = qty - qty4;
200 float *pVect1 = (
float *) pVect1v + qty4;
201 float *pVect2 = (
float *) pVect2v + qty4;
202 float res_tail =
L2Sqr(pVect1, pVect2, &qty_left);
204 return (res + res_tail);
216 #if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
217 #if defined(USE_AVX512)
219 L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX512;
220 else if (AVXCapable())
221 L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX;
222 #elif defined(USE_AVX)
224 L2SqrSIMD16Ext = L2SqrSIMD16ExtAVX;
229 else if (dim % 4 == 0)
256 L2SqrI4x(
const void *__restrict pVect1,
const void *__restrict pVect2,
const void *__restrict qty_ptr) {
258 size_t qty = *((
size_t *) qty_ptr);
260 unsigned char *a = (
unsigned char *) pVect1;
261 unsigned char *b = (
unsigned char *) pVect2;
264 for (
size_t i = 0; i < qty; i++) {
266 res += ((*a) - (*b)) * ((*a) - (*b));
269 res += ((*a) - (*b)) * ((*a) - (*b));
272 res += ((*a) - (*b)) * ((*a) - (*b));
275 res += ((*a) - (*b)) * ((*a) - (*b));
282 static int L2SqrI(
const void* __restrict pVect1,
const void* __restrict pVect2,
const void* __restrict qty_ptr) {
283 size_t qty = *((
size_t*)qty_ptr);
285 unsigned char* a = (
unsigned char*)pVect1;
286 unsigned char* b = (
unsigned char*)pVect2;
288 for(
size_t i = 0; i < qty; i++)
290 res += ((*a) - (*b)) * ((*a) - (*b));
Definition space_l2.h:208
DISTFUNC< float > get_dist_func()
Definition space_l2.h:244
size_t dim_
Definition space_l2.h:212
size_t get_data_size()
Definition space_l2.h:240
L2Space(size_t dim)
Definition space_l2.h:214
DISTFUNC< float > fstdistfunc_
Definition space_l2.h:210
size_t data_size_
Definition space_l2.h:211
void * get_dist_func_param()
Definition space_l2.h:248
~L2Space()
Definition space_l2.h:252
Definition space_l2.h:297
DISTFUNC< int > get_dist_func()
Definition space_l2.h:318
~L2SpaceI()
Definition space_l2.h:326
DISTFUNC< int > fstdistfunc_
Definition space_l2.h:299
size_t dim_
Definition space_l2.h:301
L2SpaceI(size_t dim)
Definition space_l2.h:303
void * get_dist_func_param()
Definition space_l2.h:322
size_t data_size_
Definition space_l2.h:300
size_t get_data_size()
Definition space_l2.h:314
Definition bruteforce.h:7
static float L2Sqr(const void *pVect1v, const void *pVect2v, const void *qty_ptr)
Definition space_l2.h:7
static int L2SqrI(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr)
Definition space_l2.h:282
static int L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr)
Definition space_l2.h:256
MTYPE(*)(const void *, const void *, const void *) DISTFUNC
Definition hnswlib.h:138