clang  3.7.0
emmintrin.h
Go to the documentation of this file.
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 
24 #ifndef __EMMINTRIN_H
25 #define __EMMINTRIN_H
26 
27 #ifndef __SSE2__
28 #error "SSE2 instruction set not enabled"
29 #else
30 
31 #include <xmmintrin.h>
32 
33 typedef double __m128d __attribute__((__vector_size__(16)));
34 typedef long long __m128i __attribute__((__vector_size__(16)));
35 
36 /* Type defines. */
37 typedef double __v2df __attribute__ ((__vector_size__ (16)));
38 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39 typedef short __v8hi __attribute__((__vector_size__(16)));
40 typedef char __v16qi __attribute__((__vector_size__(16)));
41 
42 /* Define the default attributes for the functions in this file. */
43 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
44 
45 static __inline__ __m128d __DEFAULT_FN_ATTRS
46 _mm_add_sd(__m128d __a, __m128d __b)
47 {
48  __a[0] += __b[0];
49  return __a;
50 }
51 
52 static __inline__ __m128d __DEFAULT_FN_ATTRS
53 _mm_add_pd(__m128d __a, __m128d __b)
54 {
55  return __a + __b;
56 }
57 
58 static __inline__ __m128d __DEFAULT_FN_ATTRS
59 _mm_sub_sd(__m128d __a, __m128d __b)
60 {
61  __a[0] -= __b[0];
62  return __a;
63 }
64 
65 static __inline__ __m128d __DEFAULT_FN_ATTRS
66 _mm_sub_pd(__m128d __a, __m128d __b)
67 {
68  return __a - __b;
69 }
70 
71 static __inline__ __m128d __DEFAULT_FN_ATTRS
72 _mm_mul_sd(__m128d __a, __m128d __b)
73 {
74  __a[0] *= __b[0];
75  return __a;
76 }
77 
78 static __inline__ __m128d __DEFAULT_FN_ATTRS
79 _mm_mul_pd(__m128d __a, __m128d __b)
80 {
81  return __a * __b;
82 }
83 
84 static __inline__ __m128d __DEFAULT_FN_ATTRS
85 _mm_div_sd(__m128d __a, __m128d __b)
86 {
87  __a[0] /= __b[0];
88  return __a;
89 }
90 
91 static __inline__ __m128d __DEFAULT_FN_ATTRS
92 _mm_div_pd(__m128d __a, __m128d __b)
93 {
94  return __a / __b;
95 }
96 
97 static __inline__ __m128d __DEFAULT_FN_ATTRS
98 _mm_sqrt_sd(__m128d __a, __m128d __b)
99 {
100  __m128d __c = __builtin_ia32_sqrtsd(__b);
101  return (__m128d) { __c[0], __a[1] };
102 }
103 
104 static __inline__ __m128d __DEFAULT_FN_ATTRS
105 _mm_sqrt_pd(__m128d __a)
106 {
107  return __builtin_ia32_sqrtpd(__a);
108 }
109 
110 static __inline__ __m128d __DEFAULT_FN_ATTRS
111 _mm_min_sd(__m128d __a, __m128d __b)
112 {
113  return __builtin_ia32_minsd(__a, __b);
114 }
115 
116 static __inline__ __m128d __DEFAULT_FN_ATTRS
117 _mm_min_pd(__m128d __a, __m128d __b)
118 {
119  return __builtin_ia32_minpd(__a, __b);
120 }
121 
122 static __inline__ __m128d __DEFAULT_FN_ATTRS
123 _mm_max_sd(__m128d __a, __m128d __b)
124 {
125  return __builtin_ia32_maxsd(__a, __b);
126 }
127 
128 static __inline__ __m128d __DEFAULT_FN_ATTRS
129 _mm_max_pd(__m128d __a, __m128d __b)
130 {
131  return __builtin_ia32_maxpd(__a, __b);
132 }
133 
134 static __inline__ __m128d __DEFAULT_FN_ATTRS
135 _mm_and_pd(__m128d __a, __m128d __b)
136 {
137  return (__m128d)((__v4si)__a & (__v4si)__b);
138 }
139 
140 static __inline__ __m128d __DEFAULT_FN_ATTRS
141 _mm_andnot_pd(__m128d __a, __m128d __b)
142 {
143  return (__m128d)(~(__v4si)__a & (__v4si)__b);
144 }
145 
146 static __inline__ __m128d __DEFAULT_FN_ATTRS
147 _mm_or_pd(__m128d __a, __m128d __b)
148 {
149  return (__m128d)((__v4si)__a | (__v4si)__b);
150 }
151 
152 static __inline__ __m128d __DEFAULT_FN_ATTRS
153 _mm_xor_pd(__m128d __a, __m128d __b)
154 {
155  return (__m128d)((__v4si)__a ^ (__v4si)__b);
156 }
157 
158 static __inline__ __m128d __DEFAULT_FN_ATTRS
159 _mm_cmpeq_pd(__m128d __a, __m128d __b)
160 {
161  return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
162 }
163 
164 static __inline__ __m128d __DEFAULT_FN_ATTRS
165 _mm_cmplt_pd(__m128d __a, __m128d __b)
166 {
167  return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
168 }
169 
170 static __inline__ __m128d __DEFAULT_FN_ATTRS
171 _mm_cmple_pd(__m128d __a, __m128d __b)
172 {
173  return (__m128d)__builtin_ia32_cmplepd(__a, __b);
174 }
175 
176 static __inline__ __m128d __DEFAULT_FN_ATTRS
177 _mm_cmpgt_pd(__m128d __a, __m128d __b)
178 {
179  return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
180 }
181 
182 static __inline__ __m128d __DEFAULT_FN_ATTRS
183 _mm_cmpge_pd(__m128d __a, __m128d __b)
184 {
185  return (__m128d)__builtin_ia32_cmplepd(__b, __a);
186 }
187 
188 static __inline__ __m128d __DEFAULT_FN_ATTRS
189 _mm_cmpord_pd(__m128d __a, __m128d __b)
190 {
191  return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
192 }
193 
194 static __inline__ __m128d __DEFAULT_FN_ATTRS
195 _mm_cmpunord_pd(__m128d __a, __m128d __b)
196 {
197  return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
198 }
199 
200 static __inline__ __m128d __DEFAULT_FN_ATTRS
201 _mm_cmpneq_pd(__m128d __a, __m128d __b)
202 {
203  return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
204 }
205 
206 static __inline__ __m128d __DEFAULT_FN_ATTRS
207 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
208 {
209  return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
210 }
211 
212 static __inline__ __m128d __DEFAULT_FN_ATTRS
213 _mm_cmpnle_pd(__m128d __a, __m128d __b)
214 {
215  return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
216 }
217 
218 static __inline__ __m128d __DEFAULT_FN_ATTRS
219 _mm_cmpngt_pd(__m128d __a, __m128d __b)
220 {
221  return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
222 }
223 
224 static __inline__ __m128d __DEFAULT_FN_ATTRS
225 _mm_cmpnge_pd(__m128d __a, __m128d __b)
226 {
227  return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
228 }
229 
230 static __inline__ __m128d __DEFAULT_FN_ATTRS
231 _mm_cmpeq_sd(__m128d __a, __m128d __b)
232 {
233  return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
234 }
235 
236 static __inline__ __m128d __DEFAULT_FN_ATTRS
237 _mm_cmplt_sd(__m128d __a, __m128d __b)
238 {
239  return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
240 }
241 
242 static __inline__ __m128d __DEFAULT_FN_ATTRS
243 _mm_cmple_sd(__m128d __a, __m128d __b)
244 {
245  return (__m128d)__builtin_ia32_cmplesd(__a, __b);
246 }
247 
248 static __inline__ __m128d __DEFAULT_FN_ATTRS
249 _mm_cmpgt_sd(__m128d __a, __m128d __b)
250 {
251  __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
252  return (__m128d) { __c[0], __a[1] };
253 }
254 
255 static __inline__ __m128d __DEFAULT_FN_ATTRS
256 _mm_cmpge_sd(__m128d __a, __m128d __b)
257 {
258  __m128d __c = __builtin_ia32_cmplesd(__b, __a);
259  return (__m128d) { __c[0], __a[1] };
260 }
261 
262 static __inline__ __m128d __DEFAULT_FN_ATTRS
263 _mm_cmpord_sd(__m128d __a, __m128d __b)
264 {
265  return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
266 }
267 
268 static __inline__ __m128d __DEFAULT_FN_ATTRS
269 _mm_cmpunord_sd(__m128d __a, __m128d __b)
270 {
271  return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
272 }
273 
274 static __inline__ __m128d __DEFAULT_FN_ATTRS
275 _mm_cmpneq_sd(__m128d __a, __m128d __b)
276 {
277  return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
278 }
279 
280 static __inline__ __m128d __DEFAULT_FN_ATTRS
281 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
282 {
283  return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
284 }
285 
286 static __inline__ __m128d __DEFAULT_FN_ATTRS
287 _mm_cmpnle_sd(__m128d __a, __m128d __b)
288 {
289  return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
290 }
291 
292 static __inline__ __m128d __DEFAULT_FN_ATTRS
293 _mm_cmpngt_sd(__m128d __a, __m128d __b)
294 {
295  __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
296  return (__m128d) { __c[0], __a[1] };
297 }
298 
299 static __inline__ __m128d __DEFAULT_FN_ATTRS
300 _mm_cmpnge_sd(__m128d __a, __m128d __b)
301 {
302  __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
303  return (__m128d) { __c[0], __a[1] };
304 }
305 
306 static __inline__ int __DEFAULT_FN_ATTRS
307 _mm_comieq_sd(__m128d __a, __m128d __b)
308 {
309  return __builtin_ia32_comisdeq(__a, __b);
310 }
311 
312 static __inline__ int __DEFAULT_FN_ATTRS
313 _mm_comilt_sd(__m128d __a, __m128d __b)
314 {
315  return __builtin_ia32_comisdlt(__a, __b);
316 }
317 
318 static __inline__ int __DEFAULT_FN_ATTRS
319 _mm_comile_sd(__m128d __a, __m128d __b)
320 {
321  return __builtin_ia32_comisdle(__a, __b);
322 }
323 
324 static __inline__ int __DEFAULT_FN_ATTRS
325 _mm_comigt_sd(__m128d __a, __m128d __b)
326 {
327  return __builtin_ia32_comisdgt(__a, __b);
328 }
329 
330 static __inline__ int __DEFAULT_FN_ATTRS
331 _mm_comige_sd(__m128d __a, __m128d __b)
332 {
333  return __builtin_ia32_comisdge(__a, __b);
334 }
335 
336 static __inline__ int __DEFAULT_FN_ATTRS
337 _mm_comineq_sd(__m128d __a, __m128d __b)
338 {
339  return __builtin_ia32_comisdneq(__a, __b);
340 }
341 
342 static __inline__ int __DEFAULT_FN_ATTRS
343 _mm_ucomieq_sd(__m128d __a, __m128d __b)
344 {
345  return __builtin_ia32_ucomisdeq(__a, __b);
346 }
347 
348 static __inline__ int __DEFAULT_FN_ATTRS
349 _mm_ucomilt_sd(__m128d __a, __m128d __b)
350 {
351  return __builtin_ia32_ucomisdlt(__a, __b);
352 }
353 
354 static __inline__ int __DEFAULT_FN_ATTRS
355 _mm_ucomile_sd(__m128d __a, __m128d __b)
356 {
357  return __builtin_ia32_ucomisdle(__a, __b);
358 }
359 
360 static __inline__ int __DEFAULT_FN_ATTRS
361 _mm_ucomigt_sd(__m128d __a, __m128d __b)
362 {
363  return __builtin_ia32_ucomisdgt(__a, __b);
364 }
365 
366 static __inline__ int __DEFAULT_FN_ATTRS
367 _mm_ucomige_sd(__m128d __a, __m128d __b)
368 {
369  return __builtin_ia32_ucomisdge(__a, __b);
370 }
371 
372 static __inline__ int __DEFAULT_FN_ATTRS
373 _mm_ucomineq_sd(__m128d __a, __m128d __b)
374 {
375  return __builtin_ia32_ucomisdneq(__a, __b);
376 }
377 
378 static __inline__ __m128 __DEFAULT_FN_ATTRS
379 _mm_cvtpd_ps(__m128d __a)
380 {
381  return __builtin_ia32_cvtpd2ps(__a);
382 }
383 
384 static __inline__ __m128d __DEFAULT_FN_ATTRS
385 _mm_cvtps_pd(__m128 __a)
386 {
387  return __builtin_ia32_cvtps2pd(__a);
388 }
389 
390 static __inline__ __m128d __DEFAULT_FN_ATTRS
391 _mm_cvtepi32_pd(__m128i __a)
392 {
393  return __builtin_ia32_cvtdq2pd((__v4si)__a);
394 }
395 
396 static __inline__ __m128i __DEFAULT_FN_ATTRS
397 _mm_cvtpd_epi32(__m128d __a)
398 {
399  return __builtin_ia32_cvtpd2dq(__a);
400 }
401 
402 static __inline__ int __DEFAULT_FN_ATTRS
403 _mm_cvtsd_si32(__m128d __a)
404 {
405  return __builtin_ia32_cvtsd2si(__a);
406 }
407 
408 static __inline__ __m128 __DEFAULT_FN_ATTRS
409 _mm_cvtsd_ss(__m128 __a, __m128d __b)
410 {
411  __a[0] = __b[0];
412  return __a;
413 }
414 
415 static __inline__ __m128d __DEFAULT_FN_ATTRS
416 _mm_cvtsi32_sd(__m128d __a, int __b)
417 {
418  __a[0] = __b;
419  return __a;
420 }
421 
422 static __inline__ __m128d __DEFAULT_FN_ATTRS
423 _mm_cvtss_sd(__m128d __a, __m128 __b)
424 {
425  __a[0] = __b[0];
426  return __a;
427 }
428 
429 static __inline__ __m128i __DEFAULT_FN_ATTRS
430 _mm_cvttpd_epi32(__m128d __a)
431 {
432  return (__m128i)__builtin_ia32_cvttpd2dq(__a);
433 }
434 
435 static __inline__ int __DEFAULT_FN_ATTRS
436 _mm_cvttsd_si32(__m128d __a)
437 {
438  return __a[0];
439 }
440 
441 static __inline__ __m64 __DEFAULT_FN_ATTRS
442 _mm_cvtpd_pi32(__m128d __a)
443 {
444  return (__m64)__builtin_ia32_cvtpd2pi(__a);
445 }
446 
447 static __inline__ __m64 __DEFAULT_FN_ATTRS
448 _mm_cvttpd_pi32(__m128d __a)
449 {
450  return (__m64)__builtin_ia32_cvttpd2pi(__a);
451 }
452 
453 static __inline__ __m128d __DEFAULT_FN_ATTRS
454 _mm_cvtpi32_pd(__m64 __a)
455 {
456  return __builtin_ia32_cvtpi2pd((__v2si)__a);
457 }
458 
459 static __inline__ double __DEFAULT_FN_ATTRS
460 _mm_cvtsd_f64(__m128d __a)
461 {
462  return __a[0];
463 }
464 
465 static __inline__ __m128d __DEFAULT_FN_ATTRS
466 _mm_load_pd(double const *__dp)
467 {
468  return *(__m128d*)__dp;
469 }
470 
471 static __inline__ __m128d __DEFAULT_FN_ATTRS
472 _mm_load1_pd(double const *__dp)
473 {
474  struct __mm_load1_pd_struct {
475  double __u;
476  } __attribute__((__packed__, __may_alias__));
477  double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
478  return (__m128d){ __u, __u };
479 }
480 
481 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
482 
483 static __inline__ __m128d __DEFAULT_FN_ATTRS
484 _mm_loadr_pd(double const *__dp)
485 {
486  __m128d __u = *(__m128d*)__dp;
487  return __builtin_shufflevector(__u, __u, 1, 0);
488 }
489 
490 static __inline__ __m128d __DEFAULT_FN_ATTRS
491 _mm_loadu_pd(double const *__dp)
492 {
493  struct __loadu_pd {
494  __m128d __v;
495  } __attribute__((__packed__, __may_alias__));
496  return ((struct __loadu_pd*)__dp)->__v;
497 }
498 
499 static __inline__ __m128d __DEFAULT_FN_ATTRS
500 _mm_load_sd(double const *__dp)
501 {
502  struct __mm_load_sd_struct {
503  double __u;
504  } __attribute__((__packed__, __may_alias__));
505  double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
506  return (__m128d){ __u, 0 };
507 }
508 
509 static __inline__ __m128d __DEFAULT_FN_ATTRS
510 _mm_loadh_pd(__m128d __a, double const *__dp)
511 {
512  struct __mm_loadh_pd_struct {
513  double __u;
514  } __attribute__((__packed__, __may_alias__));
515  double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
516  return (__m128d){ __a[0], __u };
517 }
518 
519 static __inline__ __m128d __DEFAULT_FN_ATTRS
520 _mm_loadl_pd(__m128d __a, double const *__dp)
521 {
522  struct __mm_loadl_pd_struct {
523  double __u;
524  } __attribute__((__packed__, __may_alias__));
525  double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
526  return (__m128d){ __u, __a[1] };
527 }
528 
529 static __inline__ __m128d __DEFAULT_FN_ATTRS
530 _mm_set_sd(double __w)
531 {
532  return (__m128d){ __w, 0 };
533 }
534 
535 static __inline__ __m128d __DEFAULT_FN_ATTRS
536 _mm_set1_pd(double __w)
537 {
538  return (__m128d){ __w, __w };
539 }
540 
541 static __inline__ __m128d __DEFAULT_FN_ATTRS
542 _mm_set_pd(double __w, double __x)
543 {
544  return (__m128d){ __x, __w };
545 }
546 
547 static __inline__ __m128d __DEFAULT_FN_ATTRS
548 _mm_setr_pd(double __w, double __x)
549 {
550  return (__m128d){ __w, __x };
551 }
552 
553 static __inline__ __m128d __DEFAULT_FN_ATTRS
554 _mm_setzero_pd(void)
555 {
556  return (__m128d){ 0, 0 };
557 }
558 
559 static __inline__ __m128d __DEFAULT_FN_ATTRS
560 _mm_move_sd(__m128d __a, __m128d __b)
561 {
562  return (__m128d){ __b[0], __a[1] };
563 }
564 
565 static __inline__ void __DEFAULT_FN_ATTRS
566 _mm_store_sd(double *__dp, __m128d __a)
567 {
568  struct __mm_store_sd_struct {
569  double __u;
570  } __attribute__((__packed__, __may_alias__));
571  ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
572 }
573 
574 static __inline__ void __DEFAULT_FN_ATTRS
575 _mm_store1_pd(double *__dp, __m128d __a)
576 {
577  struct __mm_store1_pd_struct {
578  double __u[2];
579  } __attribute__((__packed__, __may_alias__));
580  ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
581  ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
582 }
583 
584 static __inline__ void __DEFAULT_FN_ATTRS
585 _mm_store_pd(double *__dp, __m128d __a)
586 {
587  *(__m128d *)__dp = __a;
588 }
589 
590 static __inline__ void __DEFAULT_FN_ATTRS
591 _mm_storeu_pd(double *__dp, __m128d __a)
592 {
593  __builtin_ia32_storeupd(__dp, __a);
594 }
595 
596 static __inline__ void __DEFAULT_FN_ATTRS
597 _mm_storer_pd(double *__dp, __m128d __a)
598 {
599  __a = __builtin_shufflevector(__a, __a, 1, 0);
600  *(__m128d *)__dp = __a;
601 }
602 
603 static __inline__ void __DEFAULT_FN_ATTRS
604 _mm_storeh_pd(double *__dp, __m128d __a)
605 {
606  struct __mm_storeh_pd_struct {
607  double __u;
608  } __attribute__((__packed__, __may_alias__));
609  ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
610 }
611 
612 static __inline__ void __DEFAULT_FN_ATTRS
613 _mm_storel_pd(double *__dp, __m128d __a)
614 {
615  struct __mm_storeh_pd_struct {
616  double __u;
617  } __attribute__((__packed__, __may_alias__));
618  ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
619 }
620 
621 static __inline__ __m128i __DEFAULT_FN_ATTRS
622 _mm_add_epi8(__m128i __a, __m128i __b)
623 {
624  return (__m128i)((__v16qi)__a + (__v16qi)__b);
625 }
626 
627 static __inline__ __m128i __DEFAULT_FN_ATTRS
628 _mm_add_epi16(__m128i __a, __m128i __b)
629 {
630  return (__m128i)((__v8hi)__a + (__v8hi)__b);
631 }
632 
633 static __inline__ __m128i __DEFAULT_FN_ATTRS
634 _mm_add_epi32(__m128i __a, __m128i __b)
635 {
636  return (__m128i)((__v4si)__a + (__v4si)__b);
637 }
638 
639 static __inline__ __m64 __DEFAULT_FN_ATTRS
640 _mm_add_si64(__m64 __a, __m64 __b)
641 {
642  return __a + __b;
643 }
644 
645 static __inline__ __m128i __DEFAULT_FN_ATTRS
646 _mm_add_epi64(__m128i __a, __m128i __b)
647 {
648  return __a + __b;
649 }
650 
651 static __inline__ __m128i __DEFAULT_FN_ATTRS
652 _mm_adds_epi8(__m128i __a, __m128i __b)
653 {
654  return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
655 }
656 
657 static __inline__ __m128i __DEFAULT_FN_ATTRS
658 _mm_adds_epi16(__m128i __a, __m128i __b)
659 {
660  return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
661 }
662 
663 static __inline__ __m128i __DEFAULT_FN_ATTRS
664 _mm_adds_epu8(__m128i __a, __m128i __b)
665 {
666  return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
667 }
668 
669 static __inline__ __m128i __DEFAULT_FN_ATTRS
670 _mm_adds_epu16(__m128i __a, __m128i __b)
671 {
672  return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
673 }
674 
675 static __inline__ __m128i __DEFAULT_FN_ATTRS
676 _mm_avg_epu8(__m128i __a, __m128i __b)
677 {
678  return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
679 }
680 
681 static __inline__ __m128i __DEFAULT_FN_ATTRS
682 _mm_avg_epu16(__m128i __a, __m128i __b)
683 {
684  return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
685 }
686 
687 static __inline__ __m128i __DEFAULT_FN_ATTRS
688 _mm_madd_epi16(__m128i __a, __m128i __b)
689 {
690  return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
691 }
692 
693 static __inline__ __m128i __DEFAULT_FN_ATTRS
694 _mm_max_epi16(__m128i __a, __m128i __b)
695 {
696  return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
697 }
698 
699 static __inline__ __m128i __DEFAULT_FN_ATTRS
700 _mm_max_epu8(__m128i __a, __m128i __b)
701 {
702  return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
703 }
704 
705 static __inline__ __m128i __DEFAULT_FN_ATTRS
706 _mm_min_epi16(__m128i __a, __m128i __b)
707 {
708  return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
709 }
710 
711 static __inline__ __m128i __DEFAULT_FN_ATTRS
712 _mm_min_epu8(__m128i __a, __m128i __b)
713 {
714  return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
715 }
716 
717 static __inline__ __m128i __DEFAULT_FN_ATTRS
718 _mm_mulhi_epi16(__m128i __a, __m128i __b)
719 {
720  return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
721 }
722 
723 static __inline__ __m128i __DEFAULT_FN_ATTRS
724 _mm_mulhi_epu16(__m128i __a, __m128i __b)
725 {
726  return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
727 }
728 
729 static __inline__ __m128i __DEFAULT_FN_ATTRS
730 _mm_mullo_epi16(__m128i __a, __m128i __b)
731 {
732  return (__m128i)((__v8hi)__a * (__v8hi)__b);
733 }
734 
735 static __inline__ __m64 __DEFAULT_FN_ATTRS
736 _mm_mul_su32(__m64 __a, __m64 __b)
737 {
738  return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
739 }
740 
741 static __inline__ __m128i __DEFAULT_FN_ATTRS
742 _mm_mul_epu32(__m128i __a, __m128i __b)
743 {
744  return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
745 }
746 
747 static __inline__ __m128i __DEFAULT_FN_ATTRS
748 _mm_sad_epu8(__m128i __a, __m128i __b)
749 {
750  return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
751 }
752 
753 static __inline__ __m128i __DEFAULT_FN_ATTRS
754 _mm_sub_epi8(__m128i __a, __m128i __b)
755 {
756  return (__m128i)((__v16qi)__a - (__v16qi)__b);
757 }
758 
759 static __inline__ __m128i __DEFAULT_FN_ATTRS
760 _mm_sub_epi16(__m128i __a, __m128i __b)
761 {
762  return (__m128i)((__v8hi)__a - (__v8hi)__b);
763 }
764 
765 static __inline__ __m128i __DEFAULT_FN_ATTRS
766 _mm_sub_epi32(__m128i __a, __m128i __b)
767 {
768  return (__m128i)((__v4si)__a - (__v4si)__b);
769 }
770 
771 static __inline__ __m64 __DEFAULT_FN_ATTRS
772 _mm_sub_si64(__m64 __a, __m64 __b)
773 {
774  return __a - __b;
775 }
776 
777 static __inline__ __m128i __DEFAULT_FN_ATTRS
778 _mm_sub_epi64(__m128i __a, __m128i __b)
779 {
780  return __a - __b;
781 }
782 
783 static __inline__ __m128i __DEFAULT_FN_ATTRS
784 _mm_subs_epi8(__m128i __a, __m128i __b)
785 {
786  return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
787 }
788 
789 static __inline__ __m128i __DEFAULT_FN_ATTRS
790 _mm_subs_epi16(__m128i __a, __m128i __b)
791 {
792  return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
793 }
794 
795 static __inline__ __m128i __DEFAULT_FN_ATTRS
796 _mm_subs_epu8(__m128i __a, __m128i __b)
797 {
798  return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
799 }
800 
801 static __inline__ __m128i __DEFAULT_FN_ATTRS
802 _mm_subs_epu16(__m128i __a, __m128i __b)
803 {
804  return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
805 }
806 
807 static __inline__ __m128i __DEFAULT_FN_ATTRS
808 _mm_and_si128(__m128i __a, __m128i __b)
809 {
810  return __a & __b;
811 }
812 
813 static __inline__ __m128i __DEFAULT_FN_ATTRS
814 _mm_andnot_si128(__m128i __a, __m128i __b)
815 {
816  return ~__a & __b;
817 }
818 
819 static __inline__ __m128i __DEFAULT_FN_ATTRS
820 _mm_or_si128(__m128i __a, __m128i __b)
821 {
822  return __a | __b;
823 }
824 
825 static __inline__ __m128i __DEFAULT_FN_ATTRS
826 _mm_xor_si128(__m128i __a, __m128i __b)
827 {
828  return __a ^ __b;
829 }
830 
831 #define _mm_slli_si128(a, imm) __extension__ ({ \
832  (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(), \
833  (__v16qi)(__m128i)(a), \
834  ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \
835  ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \
836  ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \
837  ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \
838  ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \
839  ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \
840  ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \
841  ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \
842  ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \
843  ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \
844  ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \
845  ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \
846  ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \
847  ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \
848  ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \
849  ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); })
850 
851 #define _mm_bslli_si128(a, imm) \
852  _mm_slli_si128((a), (imm))
853 
854 static __inline__ __m128i __DEFAULT_FN_ATTRS
855 _mm_slli_epi16(__m128i __a, int __count)
856 {
857  return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
858 }
859 
860 static __inline__ __m128i __DEFAULT_FN_ATTRS
861 _mm_sll_epi16(__m128i __a, __m128i __count)
862 {
863  return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
864 }
865 
866 static __inline__ __m128i __DEFAULT_FN_ATTRS
867 _mm_slli_epi32(__m128i __a, int __count)
868 {
869  return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
870 }
871 
872 static __inline__ __m128i __DEFAULT_FN_ATTRS
873 _mm_sll_epi32(__m128i __a, __m128i __count)
874 {
875  return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
876 }
877 
878 static __inline__ __m128i __DEFAULT_FN_ATTRS
879 _mm_slli_epi64(__m128i __a, int __count)
880 {
881  return __builtin_ia32_psllqi128(__a, __count);
882 }
883 
884 static __inline__ __m128i __DEFAULT_FN_ATTRS
885 _mm_sll_epi64(__m128i __a, __m128i __count)
886 {
887  return __builtin_ia32_psllq128(__a, __count);
888 }
889 
890 static __inline__ __m128i __DEFAULT_FN_ATTRS
891 _mm_srai_epi16(__m128i __a, int __count)
892 {
893  return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
894 }
895 
896 static __inline__ __m128i __DEFAULT_FN_ATTRS
897 _mm_sra_epi16(__m128i __a, __m128i __count)
898 {
899  return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
900 }
901 
902 static __inline__ __m128i __DEFAULT_FN_ATTRS
903 _mm_srai_epi32(__m128i __a, int __count)
904 {
905  return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
906 }
907 
908 static __inline__ __m128i __DEFAULT_FN_ATTRS
909 _mm_sra_epi32(__m128i __a, __m128i __count)
910 {
911  return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
912 }
913 
914 #define _mm_srli_si128(a, imm) __extension__ ({ \
915  (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a), \
916  (__v16qi)_mm_setzero_si128(), \
917  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0, \
918  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1, \
919  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2, \
920  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3, \
921  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4, \
922  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5, \
923  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6, \
924  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7, \
925  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8, \
926  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9, \
927  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \
928  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \
929  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \
930  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \
931  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \
932  ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); })
933 
934 #define _mm_bsrli_si128(a, imm) \
935  _mm_srli_si128((a), (imm))
936 
937 static __inline__ __m128i __DEFAULT_FN_ATTRS
938 _mm_srli_epi16(__m128i __a, int __count)
939 {
940  return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
941 }
942 
943 static __inline__ __m128i __DEFAULT_FN_ATTRS
944 _mm_srl_epi16(__m128i __a, __m128i __count)
945 {
946  return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
947 }
948 
949 static __inline__ __m128i __DEFAULT_FN_ATTRS
950 _mm_srli_epi32(__m128i __a, int __count)
951 {
952  return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
953 }
954 
955 static __inline__ __m128i __DEFAULT_FN_ATTRS
956 _mm_srl_epi32(__m128i __a, __m128i __count)
957 {
958  return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
959 }
960 
961 static __inline__ __m128i __DEFAULT_FN_ATTRS
962 _mm_srli_epi64(__m128i __a, int __count)
963 {
964  return __builtin_ia32_psrlqi128(__a, __count);
965 }
966 
967 static __inline__ __m128i __DEFAULT_FN_ATTRS
968 _mm_srl_epi64(__m128i __a, __m128i __count)
969 {
970  return __builtin_ia32_psrlq128(__a, __count);
971 }
972 
973 static __inline__ __m128i __DEFAULT_FN_ATTRS
974 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
975 {
976  return (__m128i)((__v16qi)__a == (__v16qi)__b);
977 }
978 
979 static __inline__ __m128i __DEFAULT_FN_ATTRS
980 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
981 {
982  return (__m128i)((__v8hi)__a == (__v8hi)__b);
983 }
984 
985 static __inline__ __m128i __DEFAULT_FN_ATTRS
986 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
987 {
988  return (__m128i)((__v4si)__a == (__v4si)__b);
989 }
990 
991 static __inline__ __m128i __DEFAULT_FN_ATTRS
992 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
993 {
994  /* This function always performs a signed comparison, but __v16qi is a char
995  which may be signed or unsigned. */
996  typedef signed char __v16qs __attribute__((__vector_size__(16)));
997  return (__m128i)((__v16qs)__a > (__v16qs)__b);
998 }
999 
1000 static __inline__ __m128i __DEFAULT_FN_ATTRS
1001 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
1002 {
1003  return (__m128i)((__v8hi)__a > (__v8hi)__b);
1004 }
1005 
1006 static __inline__ __m128i __DEFAULT_FN_ATTRS
1007 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
1008 {
1009  return (__m128i)((__v4si)__a > (__v4si)__b);
1010 }
1011 
1012 static __inline__ __m128i __DEFAULT_FN_ATTRS
1013 _mm_cmplt_epi8(__m128i __a, __m128i __b)
1014 {
1015  return _mm_cmpgt_epi8(__b, __a);
1016 }
1017 
1018 static __inline__ __m128i __DEFAULT_FN_ATTRS
1019 _mm_cmplt_epi16(__m128i __a, __m128i __b)
1020 {
1021  return _mm_cmpgt_epi16(__b, __a);
1022 }
1023 
1024 static __inline__ __m128i __DEFAULT_FN_ATTRS
1025 _mm_cmplt_epi32(__m128i __a, __m128i __b)
1026 {
1027  return _mm_cmpgt_epi32(__b, __a);
1028 }
1029 
1030 #ifdef __x86_64__
1031 static __inline__ __m128d __DEFAULT_FN_ATTRS
1032 _mm_cvtsi64_sd(__m128d __a, long long __b)
1033 {
1034  __a[0] = __b;
1035  return __a;
1036 }
1037 
1038 static __inline__ long long __DEFAULT_FN_ATTRS
1039 _mm_cvtsd_si64(__m128d __a)
1040 {
1041  return __builtin_ia32_cvtsd2si64(__a);
1042 }
1043 
1044 static __inline__ long long __DEFAULT_FN_ATTRS
1045 _mm_cvttsd_si64(__m128d __a)
1046 {
1047  return __a[0];
1048 }
1049 #endif
1050 
1051 static __inline__ __m128 __DEFAULT_FN_ATTRS
1052 _mm_cvtepi32_ps(__m128i __a)
1053 {
1054  return __builtin_ia32_cvtdq2ps((__v4si)__a);
1055 }
1056 
1057 static __inline__ __m128i __DEFAULT_FN_ATTRS
1058 _mm_cvtps_epi32(__m128 __a)
1059 {
1060  return (__m128i)__builtin_ia32_cvtps2dq(__a);
1061 }
1062 
1063 static __inline__ __m128i __DEFAULT_FN_ATTRS
1064 _mm_cvttps_epi32(__m128 __a)
1065 {
1066  return (__m128i)__builtin_ia32_cvttps2dq(__a);
1067 }
1068 
1069 static __inline__ __m128i __DEFAULT_FN_ATTRS
1070 _mm_cvtsi32_si128(int __a)
1071 {
1072  return (__m128i)(__v4si){ __a, 0, 0, 0 };
1073 }
1074 
1075 #ifdef __x86_64__
1076 static __inline__ __m128i __DEFAULT_FN_ATTRS
1077 _mm_cvtsi64_si128(long long __a)
1078 {
1079  return (__m128i){ __a, 0 };
1080 }
1081 #endif
1082 
1083 static __inline__ int __DEFAULT_FN_ATTRS
1084 _mm_cvtsi128_si32(__m128i __a)
1085 {
1086  __v4si __b = (__v4si)__a;
1087  return __b[0];
1088 }
1089 
1090 #ifdef __x86_64__
1091 static __inline__ long long __DEFAULT_FN_ATTRS
1092 _mm_cvtsi128_si64(__m128i __a)
1093 {
1094  return __a[0];
1095 }
1096 #endif
1097 
1098 static __inline__ __m128i __DEFAULT_FN_ATTRS
1099 _mm_load_si128(__m128i const *__p)
1100 {
1101  return *__p;
1102 }
1103 
1104 static __inline__ __m128i __DEFAULT_FN_ATTRS
1105 _mm_loadu_si128(__m128i const *__p)
1106 {
1107  struct __loadu_si128 {
1108  __m128i __v;
1109  } __attribute__((__packed__, __may_alias__));
1110  return ((struct __loadu_si128*)__p)->__v;
1111 }
1112 
1113 static __inline__ __m128i __DEFAULT_FN_ATTRS
1114 _mm_loadl_epi64(__m128i const *__p)
1115 {
1116  struct __mm_loadl_epi64_struct {
1117  long long __u;
1118  } __attribute__((__packed__, __may_alias__));
1119  return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
1120 }
1121 
1122 static __inline__ __m128i __DEFAULT_FN_ATTRS
1123 _mm_set_epi64x(long long q1, long long q0)
1124 {
1125  return (__m128i){ q0, q1 };
1126 }
1127 
1128 static __inline__ __m128i __DEFAULT_FN_ATTRS
1129 _mm_set_epi64(__m64 q1, __m64 q0)
1130 {
1131  return (__m128i){ (long long)q0, (long long)q1 };
1132 }
1133 
1134 static __inline__ __m128i __DEFAULT_FN_ATTRS
1135 _mm_set_epi32(int i3, int i2, int i1, int i0)
1136 {
1137  return (__m128i)(__v4si){ i0, i1, i2, i3};
1138 }
1139 
1140 static __inline__ __m128i __DEFAULT_FN_ATTRS
1141 _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
1142 {
1143  return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1144 }
1145 
1146 static __inline__ __m128i __DEFAULT_FN_ATTRS
1147 _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
1148 {
1149  return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1150 }
1151 
1152 static __inline__ __m128i __DEFAULT_FN_ATTRS
1153 _mm_set1_epi64x(long long __q)
1154 {
1155  return (__m128i){ __q, __q };
1156 }
1157 
1158 static __inline__ __m128i __DEFAULT_FN_ATTRS
1159 _mm_set1_epi64(__m64 __q)
1160 {
1161  return (__m128i){ (long long)__q, (long long)__q };
1162 }
1163 
1164 static __inline__ __m128i __DEFAULT_FN_ATTRS
1165 _mm_set1_epi32(int __i)
1166 {
1167  return (__m128i)(__v4si){ __i, __i, __i, __i };
1168 }
1169 
1170 static __inline__ __m128i __DEFAULT_FN_ATTRS
1171 _mm_set1_epi16(short __w)
1172 {
1173  return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
1174 }
1175 
1176 static __inline__ __m128i __DEFAULT_FN_ATTRS
1177 _mm_set1_epi8(char __b)
1178 {
1179  return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
1180 }
1181 
1182 static __inline__ __m128i __DEFAULT_FN_ATTRS
1183 _mm_setr_epi64(__m64 q0, __m64 q1)
1184 {
1185  return (__m128i){ (long long)q0, (long long)q1 };
1186 }
1187 
1188 static __inline__ __m128i __DEFAULT_FN_ATTRS
1189 _mm_setr_epi32(int i0, int i1, int i2, int i3)
1190 {
1191  return (__m128i)(__v4si){ i0, i1, i2, i3};
1192 }
1193 
1194 static __inline__ __m128i __DEFAULT_FN_ATTRS
1195 _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
1196 {
1197  return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1198 }
1199 
1200 static __inline__ __m128i __DEFAULT_FN_ATTRS
1201 _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
1202 {
1203  return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1204 }
1205 
1206 static __inline__ __m128i __DEFAULT_FN_ATTRS
1207 _mm_setzero_si128(void)
1208 {
1209  return (__m128i){ 0LL, 0LL };
1210 }
1211 
1212 static __inline__ void __DEFAULT_FN_ATTRS
1213 _mm_store_si128(__m128i *__p, __m128i __b)
1214 {
1215  *__p = __b;
1216 }
1217 
1218 static __inline__ void __DEFAULT_FN_ATTRS
1219 _mm_storeu_si128(__m128i *__p, __m128i __b)
1220 {
1221  __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
1222 }
1223 
1224 static __inline__ void __DEFAULT_FN_ATTRS
1225 _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
1226 {
1227  __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
1228 }
1229 
1230 static __inline__ void __DEFAULT_FN_ATTRS
1231 _mm_storel_epi64(__m128i *__p, __m128i __a)
1232 {
1233  struct __mm_storel_epi64_struct {
1234  long long __u;
1235  } __attribute__((__packed__, __may_alias__));
1236  ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
1237 }
1238 
1239 static __inline__ void __DEFAULT_FN_ATTRS
1240 _mm_stream_pd(double *__p, __m128d __a)
1241 {
1242  __builtin_ia32_movntpd(__p, __a);
1243 }
1244 
1245 static __inline__ void __DEFAULT_FN_ATTRS
1246 _mm_stream_si128(__m128i *__p, __m128i __a)
1247 {
1248  __builtin_ia32_movntdq(__p, __a);
1249 }
1250 
1251 static __inline__ void __DEFAULT_FN_ATTRS
1252 _mm_stream_si32(int *__p, int __a)
1253 {
1254  __builtin_ia32_movnti(__p, __a);
1255 }
1256 
1257 #ifdef __x86_64__
1258 static __inline__ void __DEFAULT_FN_ATTRS
1259 _mm_stream_si64(long long *__p, long long __a)
1260 {
1261  __builtin_ia32_movnti64(__p, __a);
1262 }
1263 #endif
1264 
1265 static __inline__ void __DEFAULT_FN_ATTRS
1266 _mm_clflush(void const *__p)
1267 {
1268  __builtin_ia32_clflush(__p);
1269 }
1270 
1271 static __inline__ void __DEFAULT_FN_ATTRS
1272 _mm_lfence(void)
1273 {
1274  __builtin_ia32_lfence();
1275 }
1276 
1277 static __inline__ void __DEFAULT_FN_ATTRS
1278 _mm_mfence(void)
1279 {
1280  __builtin_ia32_mfence();
1281 }
1282 
1283 static __inline__ __m128i __DEFAULT_FN_ATTRS
1284 _mm_packs_epi16(__m128i __a, __m128i __b)
1285 {
1286  return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
1287 }
1288 
1289 static __inline__ __m128i __DEFAULT_FN_ATTRS
1290 _mm_packs_epi32(__m128i __a, __m128i __b)
1291 {
1292  return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
1293 }
1294 
1295 static __inline__ __m128i __DEFAULT_FN_ATTRS
1296 _mm_packus_epi16(__m128i __a, __m128i __b)
1297 {
1298  return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
1299 }
1300 
1301 static __inline__ int __DEFAULT_FN_ATTRS
1302 _mm_extract_epi16(__m128i __a, int __imm)
1303 {
1304  __v8hi __b = (__v8hi)__a;
1305  return (unsigned short)__b[__imm & 7];
1306 }
1307 
1308 static __inline__ __m128i __DEFAULT_FN_ATTRS
1309 _mm_insert_epi16(__m128i __a, int __b, int __imm)
1310 {
1311  __v8hi __c = (__v8hi)__a;
1312  __c[__imm & 7] = __b;
1313  return (__m128i)__c;
1314 }
1315 
1316 static __inline__ int __DEFAULT_FN_ATTRS
1317 _mm_movemask_epi8(__m128i __a)
1318 {
1319  return __builtin_ia32_pmovmskb128((__v16qi)__a);
1320 }
1321 
1322 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1323  (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
1324  (__v4si)_mm_set1_epi32(0), \
1325  (imm) & 0x3, ((imm) & 0xc) >> 2, \
1326  ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
1327 
1328 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1329  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
1330  (__v8hi)_mm_set1_epi16(0), \
1331  (imm) & 0x3, ((imm) & 0xc) >> 2, \
1332  ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1333  4, 5, 6, 7); })
1334 
1335 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1336  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
1337  (__v8hi)_mm_set1_epi16(0), \
1338  0, 1, 2, 3, \
1339  4 + (((imm) & 0x03) >> 0), \
1340  4 + (((imm) & 0x0c) >> 2), \
1341  4 + (((imm) & 0x30) >> 4), \
1342  4 + (((imm) & 0xc0) >> 6)); })
1343 
1344 static __inline__ __m128i __DEFAULT_FN_ATTRS
1345 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
1346 {
1347  return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
1348 }
1349 
1350 static __inline__ __m128i __DEFAULT_FN_ATTRS
1351 _mm_unpackhi_epi16(__m128i __a, __m128i __b)
1352 {
1353  return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
1354 }
1355 
1356 static __inline__ __m128i __DEFAULT_FN_ATTRS
1357 _mm_unpackhi_epi32(__m128i __a, __m128i __b)
1358 {
1359  return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
1360 }
1361 
1362 static __inline__ __m128i __DEFAULT_FN_ATTRS
1363 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
1364 {
1365  return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
1366 }
1367 
1368 static __inline__ __m128i __DEFAULT_FN_ATTRS
1369 _mm_unpacklo_epi8(__m128i __a, __m128i __b)
1370 {
1371  return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
1372 }
1373 
1374 static __inline__ __m128i __DEFAULT_FN_ATTRS
1375 _mm_unpacklo_epi16(__m128i __a, __m128i __b)
1376 {
1377  return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
1378 }
1379 
1380 static __inline__ __m128i __DEFAULT_FN_ATTRS
1381 _mm_unpacklo_epi32(__m128i __a, __m128i __b)
1382 {
1383  return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
1384 }
1385 
1386 static __inline__ __m128i __DEFAULT_FN_ATTRS
1387 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
1388 {
1389  return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
1390 }
1391 
1392 static __inline__ __m64 __DEFAULT_FN_ATTRS
1393 _mm_movepi64_pi64(__m128i __a)
1394 {
1395  return (__m64)__a[0];
1396 }
1397 
1398 static __inline__ __m128i __DEFAULT_FN_ATTRS
1399 _mm_movpi64_epi64(__m64 __a)
1400 {
1401  return (__m128i){ (long long)__a, 0 };
1402 }
1403 
1404 static __inline__ __m128i __DEFAULT_FN_ATTRS
1405 _mm_move_epi64(__m128i __a)
1406 {
1407  return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
1408 }
1409 
1410 static __inline__ __m128d __DEFAULT_FN_ATTRS
1411 _mm_unpackhi_pd(__m128d __a, __m128d __b)
1412 {
1413  return __builtin_shufflevector(__a, __b, 1, 2+1);
1414 }
1415 
1416 static __inline__ __m128d __DEFAULT_FN_ATTRS
1417 _mm_unpacklo_pd(__m128d __a, __m128d __b)
1418 {
1419  return __builtin_shufflevector(__a, __b, 0, 2+0);
1420 }
1421 
1422 static __inline__ int __DEFAULT_FN_ATTRS
1423 _mm_movemask_pd(__m128d __a)
1424 {
1425  return __builtin_ia32_movmskpd(__a);
1426 }
1427 
1428 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1429  __builtin_shufflevector((__m128d)(a), (__m128d)(b), \
1430  (i) & 1, (((i) & 2) >> 1) + 2); })
1431 
1432 static __inline__ __m128 __DEFAULT_FN_ATTRS
1433 _mm_castpd_ps(__m128d __a)
1434 {
1435  return (__m128)__a;
1436 }
1437 
1438 static __inline__ __m128i __DEFAULT_FN_ATTRS
1439 _mm_castpd_si128(__m128d __a)
1440 {
1441  return (__m128i)__a;
1442 }
1443 
1444 static __inline__ __m128d __DEFAULT_FN_ATTRS
1445 _mm_castps_pd(__m128 __a)
1446 {
1447  return (__m128d)__a;
1448 }
1449 
1450 static __inline__ __m128i __DEFAULT_FN_ATTRS
1451 _mm_castps_si128(__m128 __a)
1452 {
1453  return (__m128i)__a;
1454 }
1455 
1456 static __inline__ __m128 __DEFAULT_FN_ATTRS
1457 _mm_castsi128_ps(__m128i __a)
1458 {
1459  return (__m128)__a;
1460 }
1461 
1462 static __inline__ __m128d __DEFAULT_FN_ATTRS
1463 _mm_castsi128_pd(__m128i __a)
1464 {
1465  return (__m128d)__a;
1466 }
1467 
1468 static __inline__ void __DEFAULT_FN_ATTRS
1469 _mm_pause(void)
1470 {
1471  __asm__ volatile ("pause");
1472 }
1473 
1474 #undef __DEFAULT_FN_ATTRS
1475 
1476 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1477 
1478 #endif /* __SSE2__ */
1479 
1480 #endif /* __EMMINTRIN_H */
char __v64qi __attribute__((__vector_size__(64)))
static vector float vector float __b
Definition: altivec.h:387
#define __DEFAULT_FN_ATTRS
Definition: adxintrin.h:32
static void int int __c
Definition: altivec.h:1911