25 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
32 # error "XOP instruction set is not enabled"
38 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
41 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
43 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
47 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
49 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
53 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
55 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
59 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
61 return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
65 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
67 return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
71 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
73 return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
77 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
79 return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
83 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
85 return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
89 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
91 return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
95 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
97 return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
101 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
103 return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
107 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
109 return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
113 _mm_haddw_epi8(__m128i __A)
115 return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
119 _mm_haddd_epi8(__m128i __A)
121 return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
125 _mm_haddq_epi8(__m128i __A)
127 return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
131 _mm_haddd_epi16(__m128i __A)
133 return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
137 _mm_haddq_epi16(__m128i __A)
139 return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
143 _mm_haddq_epi32(__m128i __A)
145 return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
149 _mm_haddw_epu8(__m128i __A)
151 return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
155 _mm_haddd_epu8(__m128i __A)
157 return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
161 _mm_haddq_epu8(__m128i __A)
163 return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
167 _mm_haddd_epu16(__m128i __A)
169 return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
173 _mm_haddq_epu16(__m128i __A)
175 return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
179 _mm_haddq_epu32(__m128i __A)
181 return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
185 _mm_hsubw_epi8(__m128i __A)
187 return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
191 _mm_hsubd_epi16(__m128i __A)
193 return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
197 _mm_hsubq_epi32(__m128i __A)
199 return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
203 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
205 return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
209 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
211 return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
215 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
217 return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
221 _mm_rot_epi8(__m128i __A, __m128i __B)
223 return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
227 _mm_rot_epi16(__m128i __A, __m128i __B)
229 return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
233 _mm_rot_epi32(__m128i __A, __m128i __B)
235 return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
239 _mm_rot_epi64(__m128i __A, __m128i __B)
241 return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
244 #define _mm_roti_epi8(A, N) __extension__ ({ \
246 (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); })
248 #define _mm_roti_epi16(A, N) __extension__ ({ \
250 (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); })
252 #define _mm_roti_epi32(A, N) __extension__ ({ \
254 (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); })
256 #define _mm_roti_epi64(A, N) __extension__ ({ \
258 (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
261 _mm_shl_epi8(__m128i __A, __m128i __B)
263 return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
267 _mm_shl_epi16(__m128i __A, __m128i __B)
269 return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
273 _mm_shl_epi32(__m128i __A, __m128i __B)
275 return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
279 _mm_shl_epi64(__m128i __A, __m128i __B)
281 return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
285 _mm_sha_epi8(__m128i __A, __m128i __B)
287 return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
291 _mm_sha_epi16(__m128i __A, __m128i __B)
293 return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
297 _mm_sha_epi32(__m128i __A, __m128i __B)
299 return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
303 _mm_sha_epi64(__m128i __A, __m128i __B)
305 return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
308 #define _mm_com_epu8(A, B, N) __extension__ ({ \
311 (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); })
313 #define _mm_com_epu16(A, B, N) __extension__ ({ \
316 (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); })
318 #define _mm_com_epu32(A, B, N) __extension__ ({ \
321 (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); })
323 #define _mm_com_epu64(A, B, N) __extension__ ({ \
326 (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); })
328 #define _mm_com_epi8(A, B, N) __extension__ ({ \
331 (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); })
333 #define _mm_com_epi16(A, B, N) __extension__ ({ \
336 (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); })
338 #define _mm_com_epi32(A, B, N) __extension__ ({ \
341 (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); })
343 #define _mm_com_epi64(A, B, N) __extension__ ({ \
346 (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
348 #define _MM_PCOMCTRL_LT 0
349 #define _MM_PCOMCTRL_LE 1
350 #define _MM_PCOMCTRL_GT 2
351 #define _MM_PCOMCTRL_GE 3
352 #define _MM_PCOMCTRL_EQ 4
353 #define _MM_PCOMCTRL_NEQ 5
354 #define _MM_PCOMCTRL_FALSE 6
355 #define _MM_PCOMCTRL_TRUE 7
358 _mm_comlt_epu8(__m128i __A, __m128i __B)
360 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
364 _mm_comle_epu8(__m128i __A, __m128i __B)
366 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
370 _mm_comgt_epu8(__m128i __A, __m128i __B)
372 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
376 _mm_comge_epu8(__m128i __A, __m128i __B)
378 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
382 _mm_comeq_epu8(__m128i __A, __m128i __B)
384 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
388 _mm_comneq_epu8(__m128i __A, __m128i __B)
390 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
394 _mm_comfalse_epu8(__m128i __A, __m128i __B)
396 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
400 _mm_comtrue_epu8(__m128i __A, __m128i __B)
402 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
406 _mm_comlt_epu16(__m128i __A, __m128i __B)
408 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
412 _mm_comle_epu16(__m128i __A, __m128i __B)
414 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
418 _mm_comgt_epu16(__m128i __A, __m128i __B)
420 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
424 _mm_comge_epu16(__m128i __A, __m128i __B)
426 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
430 _mm_comeq_epu16(__m128i __A, __m128i __B)
432 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
436 _mm_comneq_epu16(__m128i __A, __m128i __B)
438 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
442 _mm_comfalse_epu16(__m128i __A, __m128i __B)
444 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
448 _mm_comtrue_epu16(__m128i __A, __m128i __B)
450 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
454 _mm_comlt_epu32(__m128i __A, __m128i __B)
456 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
460 _mm_comle_epu32(__m128i __A, __m128i __B)
462 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
466 _mm_comgt_epu32(__m128i __A, __m128i __B)
468 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
472 _mm_comge_epu32(__m128i __A, __m128i __B)
474 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
478 _mm_comeq_epu32(__m128i __A, __m128i __B)
480 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
484 _mm_comneq_epu32(__m128i __A, __m128i __B)
486 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
490 _mm_comfalse_epu32(__m128i __A, __m128i __B)
492 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
496 _mm_comtrue_epu32(__m128i __A, __m128i __B)
498 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
502 _mm_comlt_epu64(__m128i __A, __m128i __B)
504 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
508 _mm_comle_epu64(__m128i __A, __m128i __B)
510 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
514 _mm_comgt_epu64(__m128i __A, __m128i __B)
516 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
520 _mm_comge_epu64(__m128i __A, __m128i __B)
522 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
526 _mm_comeq_epu64(__m128i __A, __m128i __B)
528 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
532 _mm_comneq_epu64(__m128i __A, __m128i __B)
534 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
538 _mm_comfalse_epu64(__m128i __A, __m128i __B)
540 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
544 _mm_comtrue_epu64(__m128i __A, __m128i __B)
546 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
550 _mm_comlt_epi8(__m128i __A, __m128i __B)
552 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
556 _mm_comle_epi8(__m128i __A, __m128i __B)
558 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
562 _mm_comgt_epi8(__m128i __A, __m128i __B)
564 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
568 _mm_comge_epi8(__m128i __A, __m128i __B)
570 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
574 _mm_comeq_epi8(__m128i __A, __m128i __B)
576 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
580 _mm_comneq_epi8(__m128i __A, __m128i __B)
582 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
586 _mm_comfalse_epi8(__m128i __A, __m128i __B)
588 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
592 _mm_comtrue_epi8(__m128i __A, __m128i __B)
594 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
598 _mm_comlt_epi16(__m128i __A, __m128i __B)
600 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
604 _mm_comle_epi16(__m128i __A, __m128i __B)
606 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
610 _mm_comgt_epi16(__m128i __A, __m128i __B)
612 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
616 _mm_comge_epi16(__m128i __A, __m128i __B)
618 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
622 _mm_comeq_epi16(__m128i __A, __m128i __B)
624 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
628 _mm_comneq_epi16(__m128i __A, __m128i __B)
630 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
634 _mm_comfalse_epi16(__m128i __A, __m128i __B)
636 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
640 _mm_comtrue_epi16(__m128i __A, __m128i __B)
642 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
646 _mm_comlt_epi32(__m128i __A, __m128i __B)
648 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
652 _mm_comle_epi32(__m128i __A, __m128i __B)
654 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
658 _mm_comgt_epi32(__m128i __A, __m128i __B)
660 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
664 _mm_comge_epi32(__m128i __A, __m128i __B)
666 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
670 _mm_comeq_epi32(__m128i __A, __m128i __B)
672 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
676 _mm_comneq_epi32(__m128i __A, __m128i __B)
678 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
682 _mm_comfalse_epi32(__m128i __A, __m128i __B)
684 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
688 _mm_comtrue_epi32(__m128i __A, __m128i __B)
690 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
694 _mm_comlt_epi64(__m128i __A, __m128i __B)
696 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
700 _mm_comle_epi64(__m128i __A, __m128i __B)
702 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
706 _mm_comgt_epi64(__m128i __A, __m128i __B)
708 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
712 _mm_comge_epi64(__m128i __A, __m128i __B)
714 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
718 _mm_comeq_epi64(__m128i __A, __m128i __B)
720 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
724 _mm_comneq_epi64(__m128i __A, __m128i __B)
726 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
730 _mm_comfalse_epi64(__m128i __A, __m128i __B)
732 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
736 _mm_comtrue_epi64(__m128i __A, __m128i __B)
738 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
741 #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
745 (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \
746 (__v2di)__C, (I)); })
748 #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
752 (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \
753 (__v4di)__C, (I)); })
755 #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
759 (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \
760 (__v4si)__C, (I)); })
762 #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
766 (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
767 (__v8si)__C, (I)); })
770 _mm_frcz_ss(__m128 __A)
772 return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
776 _mm_frcz_sd(__m128d __A)
778 return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
782 _mm_frcz_ps(__m128 __A)
784 return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
788 _mm_frcz_pd(__m128d __A)
790 return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
794 _mm256_frcz_ps(__m256 __A)
796 return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
800 _mm256_frcz_pd(__m256d __A)
802 return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
805 #undef __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS