30 typedef long long __m128i
__attribute__((__vector_size__(16)));
34 typedef long long __v2di
__attribute__ ((__vector_size__ (16)));
40 typedef signed char __v16qs
__attribute__((__vector_size__(16)));
45 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
102 __m128d
__c = __builtin_ia32_sqrtsd(__b);
103 return (__m128d) { __c[0], __a[1] };
109 return __builtin_ia32_sqrtpd(__a);
115 return __builtin_ia32_minsd(__a, __b);
121 return __builtin_ia32_minpd(__a, __b);
127 return __builtin_ia32_maxsd(__a, __b);
133 return __builtin_ia32_maxpd(__a, __b);
139 return (__m128d)((__v4si)__a & (__v4si)
__b);
145 return (__m128d)(~(__v4si)__a & (__v4si)
__b);
151 return (__m128d)((__v4si)__a | (__v4si)
__b);
157 return (__m128d)((__v4si)__a ^ (__v4si)
__b);
163 return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
169 return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
175 return (__m128d)__builtin_ia32_cmplepd(__a, __b);
181 return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
187 return (__m128d)__builtin_ia32_cmplepd(__b, __a);
193 return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
199 return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
205 return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
211 return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
217 return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
223 return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
229 return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
235 return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
241 return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
247 return (__m128d)__builtin_ia32_cmplesd(__a, __b);
253 __m128d
__c = __builtin_ia32_cmpltsd(__b, __a);
254 return (__m128d) { __c[0], __a[1] };
260 __m128d
__c = __builtin_ia32_cmplesd(__b, __a);
261 return (__m128d) { __c[0], __a[1] };
267 return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
273 return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
279 return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
285 return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
291 return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
297 __m128d
__c = __builtin_ia32_cmpnltsd(__b, __a);
298 return (__m128d) { __c[0], __a[1] };
304 __m128d
__c = __builtin_ia32_cmpnlesd(__b, __a);
305 return (__m128d) { __c[0], __a[1] };
311 return __builtin_ia32_comisdeq(__a, __b);
317 return __builtin_ia32_comisdlt(__a, __b);
323 return __builtin_ia32_comisdle(__a, __b);
329 return __builtin_ia32_comisdgt(__a, __b);
335 return __builtin_ia32_comisdge(__a, __b);
341 return __builtin_ia32_comisdneq(__a, __b);
347 return __builtin_ia32_ucomisdeq(__a, __b);
353 return __builtin_ia32_ucomisdlt(__a, __b);
359 return __builtin_ia32_ucomisdle(__a, __b);
365 return __builtin_ia32_ucomisdgt(__a, __b);
371 return __builtin_ia32_ucomisdge(__a, __b);
377 return __builtin_ia32_ucomisdneq(__a, __b);
383 return __builtin_ia32_cvtpd2ps(__a);
389 return __builtin_ia32_cvtps2pd(__a);
395 return __builtin_ia32_cvtdq2pd((__v4si)__a);
401 return __builtin_ia32_cvtpd2dq(__a);
407 return __builtin_ia32_cvtsd2si(__a);
434 return (__m128i)__builtin_ia32_cvttpd2dq(__a);
446 return (__m64)__builtin_ia32_cvtpd2pi(__a);
452 return (__m64)__builtin_ia32_cvttpd2pi(__a);
458 return __builtin_ia32_cvtpi2pd((__v2si)__a);
470 return *(__m128d*)__dp;
476 struct __mm_load1_pd_struct {
479 double __u = ((
struct __mm_load1_pd_struct*)__dp)->__u;
480 return (__m128d){ __u, __u };
483 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
488 __m128d __u = *(__m128d*)__dp;
489 return __builtin_shufflevector(__u, __u, 1, 0);
498 return ((
struct __loadu_pd*)__dp)->__v;
504 struct __mm_load_sd_struct {
507 double __u = ((
struct __mm_load_sd_struct*)__dp)->__u;
508 return (__m128d){ __u, 0 };
514 struct __mm_loadh_pd_struct {
517 double __u = ((
struct __mm_loadh_pd_struct*)__dp)->__u;
518 return (__m128d){ __a[0], __u };
524 struct __mm_loadl_pd_struct {
527 double __u = ((
struct __mm_loadl_pd_struct*)__dp)->__u;
528 return (__m128d){ __u, __a[1] };
534 return (__m128d)__builtin_ia32_undef128();
540 return (__m128d){ __w, 0 };
546 return (__m128d){ __w, __w };
552 return (__m128d){
__x, __w };
558 return (__m128d){ __w, __x };
564 return (__m128d){ 0, 0 };
570 return (__m128d){ __b[0], __a[1] };
576 struct __mm_store_sd_struct {
579 ((
struct __mm_store_sd_struct*)__dp)->__u = __a[0];
585 struct __mm_store1_pd_struct {
588 ((
struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
589 ((
struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
595 *(__m128d *)__dp = __a;
601 __builtin_ia32_storeupd(__dp, __a);
607 __a = __builtin_shufflevector(__a, __a, 1, 0);
608 *(__m128d *)__dp = __a;
614 struct __mm_storeh_pd_struct {
617 ((
struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
623 struct __mm_storeh_pd_struct {
626 ((
struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
632 return (__m128i)((__v16qi)__a + (__v16qi)
__b);
638 return (__m128i)((__v8hi)__a + (__v8hi)
__b);
644 return (__m128i)((__v4si)__a + (__v4si)
__b);
650 return (__m64)__builtin_ia32_paddq(__a, __b);
662 return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
668 return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
674 return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
680 return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
686 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
692 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
698 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
704 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
710 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
716 return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
722 return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
728 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
734 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
740 return (__m128i)((__v8hi)__a * (__v8hi)
__b);
746 return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
752 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
758 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
764 return (__m128i)((__v16qi)__a - (__v16qi)
__b);
770 return (__m128i)((__v8hi)__a - (__v8hi)
__b);
776 return (__m128i)((__v4si)__a - (__v4si)
__b);
782 return (__m64)__builtin_ia32_psubq(__a, __b);
794 return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
800 return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
806 return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
812 return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
839 #define _mm_slli_si128(a, imm) __extension__ ({ \
840 (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(), \
841 (__v16qi)(__m128i)(a), \
842 ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \
843 ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \
844 ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \
845 ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \
846 ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \
847 ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \
848 ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \
849 ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \
850 ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \
851 ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \
852 ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \
853 ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \
854 ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \
855 ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \
856 ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \
857 ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); })
859 #define _mm_bslli_si128(a, imm) \
860 _mm_slli_si128((a), (imm))
865 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
871 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
877 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
883 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
889 return __builtin_ia32_psllqi128(__a, __count);
895 return __builtin_ia32_psllq128(__a, __count);
901 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
907 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
913 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
919 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
922 #define _mm_srli_si128(a, imm) __extension__ ({ \
923 (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a), \
924 (__v16qi)_mm_setzero_si128(), \
925 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0, \
926 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1, \
927 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2, \
928 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3, \
929 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4, \
930 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5, \
931 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6, \
932 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7, \
933 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8, \
934 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9, \
935 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \
936 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \
937 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \
938 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \
939 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \
940 ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); })
942 #define _mm_bsrli_si128(a, imm) \
943 _mm_srli_si128((a), (imm))
948 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
954 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
960 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
966 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
972 return __builtin_ia32_psrlqi128(__a, __count);
978 return __builtin_ia32_psrlq128(__a, __count);
984 return (__m128i)((__v16qi)__a == (__v16qi)
__b);
990 return (__m128i)((__v8hi)__a == (__v8hi)
__b);
996 return (__m128i)((__v4si)__a == (__v4si)
__b);
1004 return (__m128i)((__v16qs)__a > (__v16qs)
__b);
1010 return (__m128i)((__v8hi)__a > (__v8hi)
__b);
1016 return (__m128i)((__v4si)__a > (__v4si)
__b);
1039 _mm_cvtsi64_sd(__m128d __a,
long long __b)
1046 _mm_cvtsd_si64(__m128d __a)
1048 return __builtin_ia32_cvtsd2si64(__a);
1052 _mm_cvttsd_si64(__m128d __a)
1061 return __builtin_ia32_cvtdq2ps((__v4si)__a);
1067 return (__m128i)__builtin_ia32_cvtps2dq(__a);
1073 return (__m128i)__builtin_ia32_cvttps2dq(__a);
1079 return (__m128i)(__v4si){ __a, 0, 0, 0 };
1084 _mm_cvtsi64_si128(
long long __a)
1086 return (__m128i){ __a, 0 };
1093 __v4si
__b = (__v4si)__a;
1099 _mm_cvtsi128_si64(__m128i __a)
1114 struct __loadu_si128 {
1117 return ((
struct __loadu_si128*)__p)->__v;
1123 struct __mm_loadl_epi64_struct {
1126 return (__m128i) { ((
struct __mm_loadl_epi64_struct*)__p)->__u, 0};
1132 return (__m128i)__builtin_ia32_undef128();
1138 return (__m128i){ __q0, __q1 };
1144 return (__m128i){ (
long long)__q0, (
long long)__q1 };
1150 return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
1154 _mm_set_epi16(
short __w7,
short __w6,
short __w5,
short __w4,
short __w3,
short __w2,
short __w1,
short __w0)
1156 return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
1160 _mm_set_epi8(
char __b15,
char __b14,
char __b13,
char __b12,
char __b11,
char __b10,
char __b9,
char __b8,
char __b7,
char __b6,
char __b5,
char __b4,
char __b3,
char __b2,
char __b1,
char __b0)
1162 return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
1168 return (__m128i){ __q, __q };
1174 return (__m128i){ (
long long)__q, (
long long)__q };
1180 return (__m128i)(__v4si){ __i, __i, __i, __i };
1186 return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
1192 return (__m128i)(__v16qi){
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b,
__b, __b };
1198 return (__m128i){ (
long long)__q0, (
long long)__q1 };
1204 return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
1208 _mm_setr_epi16(
short __w0,
short __w1,
short __w2,
short __w3,
short __w4,
short __w5,
short __w6,
short __w7)
1210 return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
1214 _mm_setr_epi8(
char __b0,
char __b1,
char __b2,
char __b3,
char __b4,
char __b5,
char __b6,
char __b7,
char __b8,
char __b9,
char __b10,
char __b11,
char __b12,
char __b13,
char __b14,
char __b15)
1216 return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
1222 return (__m128i){ 0LL, 0LL };
1234 __builtin_ia32_storedqu((
char *)__p, (__v16qi)__b);
1240 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
1246 struct __mm_storel_epi64_struct {
1249 ((
struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
1255 __builtin_ia32_movntpd(__p, __a);
1261 __builtin_ia32_movntdq(__p, __a);
1267 __builtin_ia32_movnti(__p, __a);
1272 _mm_stream_si64(
long long *
__p,
long long __a)
1274 __builtin_ia32_movnti64(__p, __a);
1281 __builtin_ia32_clflush(__p);
1287 __builtin_ia32_lfence();
1293 __builtin_ia32_mfence();
1299 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
1305 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
1311 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
1317 __v8hi
__b = (__v8hi)__a;
1318 return (
unsigned short)__b[__imm & 7];
1324 __v8hi
__c = (__v8hi)__a;
1325 __c[__imm & 7] =
__b;
1326 return (__m128i)
__c;
1332 return __builtin_ia32_pmovmskb128((__v16qi)__a);
1335 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1336 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
1337 (__v4si)_mm_setzero_si128(), \
1338 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1339 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
1341 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1342 (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
1343 (__v8hi)_mm_setzero_si128(), \
1344 (imm) & 0x3, ((imm) & 0xc) >> 2, \
1345 ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1348 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1349 (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
1350 (__v8hi)_mm_setzero_si128(), \
1352 4 + (((imm) & 0x03) >> 0), \
1353 4 + (((imm) & 0x0c) >> 2), \
1354 4 + (((imm) & 0x30) >> 4), \
1355 4 + (((imm) & 0xc0) >> 6)); })
1360 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
1366 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
1372 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
1378 return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
1384 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
1390 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
1396 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
1402 return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
1408 return (__m64)__a[0];
1414 return (__m128i){ (
long long)__a, 0 };
1420 return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
1426 return __builtin_shufflevector(__a, __b, 1, 2+1);
1432 return __builtin_shufflevector(__a, __b, 0, 2+0);
1438 return __builtin_ia32_movmskpd(__a);
1441 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1442 (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
1443 (i) & 1, (((i) & 2) >> 1) + 2); })
1454 return (__m128i)__a;
1460 return (__m128d)__a;
1466 return (__m128i)__a;
1478 return (__m128d)__a;
1484 __builtin_ia32_pause();
1487 #undef __DEFAULT_FN_ATTRS
1489 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w)
static __inline__ void __DEFAULT_FN_ATTRS _mm_clflush(void const *__p)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp)
static __inline__ int __DEFAULT_FN_ATTRS _mm_extract_epi16(__m128i __a, int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
double __m128d __attribute__((__vector_size__(16)))
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i const *__p)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i const *__p)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0)
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mfence(void)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_lfence(void)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
static vector float vector float __b
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b)
static __inline unsigned char unsigned int __x
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a)
static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i *__p, __m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_insert_epi16(__m128i __a, int __b, int __imm)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtpi32_pd(__m64 __a)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128()
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b)
#define __DEFAULT_FN_ATTRS
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i *__p, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si32(int *__p, int __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a)
static __inline__ void __DEFAULT_FN_ATTRS _mm_pause(void)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd()
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b)