Using the new clang 3.3 headers

This commit is contained in:
Strahinja Val Markovic 2013-06-19 21:58:53 -07:00
parent 3d02f0aba3
commit 280c704814
24 changed files with 5582 additions and 10363 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -29,39 +29,39 @@
#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_abs_epi8(__m256i a) _mm256_abs_epi8(__m256i __a)
{ {
return (__m256i)__builtin_ia32_pabsb256((__v32qi)a); return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_abs_epi16(__m256i a) _mm256_abs_epi16(__m256i __a)
{ {
return (__m256i)__builtin_ia32_pabsw256((__v16hi)a); return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_abs_epi32(__m256i a) _mm256_abs_epi32(__m256i __a)
{ {
return (__m256i)__builtin_ia32_pabsd256((__v8si)a); return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_packs_epi16(__m256i a, __m256i b) _mm256_packs_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_packsswb256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_packs_epi32(__m256i a, __m256i b) _mm256_packs_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_packssdw256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_packus_epi16(__m256i a, __m256i b) _mm256_packus_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_packuswb256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
@ -71,51 +71,51 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_add_epi8(__m256i a, __m256i b) _mm256_add_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v32qi)a + (__v32qi)b); return (__m256i)((__v32qi)__a + (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_add_epi16(__m256i a, __m256i b) _mm256_add_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v16hi)a + (__v16hi)b); return (__m256i)((__v16hi)__a + (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_add_epi32(__m256i a, __m256i b) _mm256_add_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v8si)a + (__v8si)b); return (__m256i)((__v8si)__a + (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_add_epi64(__m256i a, __m256i b) _mm256_add_epi64(__m256i __a, __m256i __b)
{ {
return a + b; return __a + __b;
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_adds_epi8(__m256i a, __m256i b) _mm256_adds_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_paddsb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_adds_epi16(__m256i a, __m256i b) _mm256_adds_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_paddsw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_adds_epu8(__m256i a, __m256i b) _mm256_adds_epu8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_paddusb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_adds_epu16(__m256i a, __m256i b) _mm256_adds_epu16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_paddusw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
} }
#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ #define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
@ -124,27 +124,27 @@ _mm256_adds_epu16(__m256i a, __m256i b)
(__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); }) (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_and_si256(__m256i a, __m256i b) _mm256_and_si256(__m256i __a, __m256i __b)
{ {
return a & b; return __a & __b;
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_andnot_si256(__m256i a, __m256i b) _mm256_andnot_si256(__m256i __a, __m256i __b)
{ {
return ~a & b; return ~__a & __b;
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_avg_epu8(__m256i a, __m256i b) _mm256_avg_epu8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pavgb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_avg_epu16(__m256i a, __m256i b) _mm256_avg_epu16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pavgw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
@ -160,177 +160,177 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
(__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); }) (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); })
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmpeq_epi8(__m256i a, __m256i b) _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v32qi)a == (__v32qi)b); return (__m256i)((__v32qi)__a == (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmpeq_epi16(__m256i a, __m256i b) _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v16hi)a == (__v16hi)b); return (__m256i)((__v16hi)__a == (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmpeq_epi32(__m256i a, __m256i b) _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v8si)a == (__v8si)b); return (__m256i)((__v8si)__a == (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmpeq_epi64(__m256i a, __m256i b) _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
{ {
return (__m256i)(a == b); return (__m256i)(__a == __b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmpgt_epi8(__m256i a, __m256i b) _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v32qi)a > (__v32qi)b); return (__m256i)((__v32qi)__a > (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmpgt_epi16(__m256i a, __m256i b) _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v16hi)a > (__v16hi)b); return (__m256i)((__v16hi)__a > (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmpgt_epi32(__m256i a, __m256i b) _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v8si)a > (__v8si)b); return (__m256i)((__v8si)__a > (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmpgt_epi64(__m256i a, __m256i b) _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
{ {
return (__m256i)(a > b); return (__m256i)(__a > __b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_hadd_epi16(__m256i a, __m256i b) _mm256_hadd_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_phaddw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_hadd_epi32(__m256i a, __m256i b) _mm256_hadd_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_phaddd256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_hadds_epi16(__m256i a, __m256i b) _mm256_hadds_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_phaddsw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_hsub_epi16(__m256i a, __m256i b) _mm256_hsub_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_phsubw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_hsub_epi32(__m256i a, __m256i b) _mm256_hsub_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_phsubd256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_hsubs_epi16(__m256i a, __m256i b) _mm256_hsubs_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_maddubs_epi16(__m256i a, __m256i b) _mm256_maddubs_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_madd_epi16(__m256i a, __m256i b) _mm256_madd_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_max_epi8(__m256i a, __m256i b) _mm256_max_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_max_epi16(__m256i a, __m256i b) _mm256_max_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_max_epi32(__m256i a, __m256i b) _mm256_max_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmaxsd256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_max_epu8(__m256i a, __m256i b) _mm256_max_epu8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmaxub256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_max_epu16(__m256i a, __m256i b) _mm256_max_epu16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_max_epu32(__m256i a, __m256i b) _mm256_max_epu32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmaxud256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_min_epi8(__m256i a, __m256i b) _mm256_min_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pminsb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_min_epi16(__m256i a, __m256i b) _mm256_min_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pminsw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_min_epi32(__m256i a, __m256i b) _mm256_min_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pminsd256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_min_epu8(__m256i a, __m256i b) _mm256_min_epu8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pminub256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_min_epu16(__m256i a, __m256i b) _mm256_min_epu16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_min_epu32(__m256i a, __m256i b) _mm256_min_epu32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pminud256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
} }
static __inline__ int __attribute__((__always_inline__, __nodebug__)) static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm256_movemask_epi8(__m256i a) _mm256_movemask_epi8(__m256i __a)
{ {
return __builtin_ia32_pmovmskb256((__v32qi)a); return __builtin_ia32_pmovmskb256((__v32qi)__a);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
@ -406,63 +406,63 @@ _mm256_cvtepu32_epi64(__m128i __V)
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_mul_epi32(__m256i a, __m256i b) _mm256_mul_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_mulhrs_epi16(__m256i a, __m256i b) _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_mulhi_epu16(__m256i a, __m256i b) _mm256_mulhi_epu16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_mulhi_epi16(__m256i a, __m256i b) _mm256_mulhi_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_mullo_epi16(__m256i a, __m256i b) _mm256_mullo_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v16hi)a * (__v16hi)b); return (__m256i)((__v16hi)__a * (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_mullo_epi32 (__m256i a, __m256i b) _mm256_mullo_epi32 (__m256i __a, __m256i __b)
{ {
return (__m256i)((__v8si)a * (__v8si)b); return (__m256i)((__v8si)__a * (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_mul_epu32(__m256i a, __m256i b) _mm256_mul_epu32(__m256i __a, __m256i __b)
{ {
return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b); return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_or_si256(__m256i a, __m256i b) _mm256_or_si256(__m256i __a, __m256i __b)
{ {
return a | b; return __a | __b;
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sad_epu8(__m256i a, __m256i b) _mm256_sad_epu8(__m256i __a, __m256i __b)
{ {
return __builtin_ia32_psadbw256((__v32qi)a, (__v32qi)b); return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_shuffle_epi8(__m256i a, __m256i b) _mm256_shuffle_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_pshufb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
} }
#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ #define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
@ -502,21 +502,21 @@ _mm256_shuffle_epi8(__m256i a, __m256i b)
12, 13, 14, 15); }) 12, 13, 14, 15); })
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sign_epi8(__m256i a, __m256i b) _mm256_sign_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_psignb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sign_epi16(__m256i a, __m256i b) _mm256_sign_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_psignw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sign_epi32(__m256i a, __m256i b) _mm256_sign_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_psignd256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
} }
#define _mm256_slli_si256(a, count) __extension__ ({ \ #define _mm256_slli_si256(a, count) __extension__ ({ \
@ -524,63 +524,63 @@ _mm256_sign_epi32(__m256i a, __m256i b)
(__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); }) (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_slli_epi16(__m256i a, int count) _mm256_slli_epi16(__m256i __a, int __count)
{ {
return (__m256i)__builtin_ia32_psllwi256((__v16hi)a, count); return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sll_epi16(__m256i a, __m128i count) _mm256_sll_epi16(__m256i __a, __m128i __count)
{ {
return (__m256i)__builtin_ia32_psllw256((__v16hi)a, (__v8hi)count); return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_slli_epi32(__m256i a, int count) _mm256_slli_epi32(__m256i __a, int __count)
{ {
return (__m256i)__builtin_ia32_pslldi256((__v8si)a, count); return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sll_epi32(__m256i a, __m128i count) _mm256_sll_epi32(__m256i __a, __m128i __count)
{ {
return (__m256i)__builtin_ia32_pslld256((__v8si)a, (__v4si)count); return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_slli_epi64(__m256i a, int count) _mm256_slli_epi64(__m256i __a, int __count)
{ {
return __builtin_ia32_psllqi256(a, count); return __builtin_ia32_psllqi256(__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sll_epi64(__m256i a, __m128i count) _mm256_sll_epi64(__m256i __a, __m128i __count)
{ {
return __builtin_ia32_psllq256(a, count); return __builtin_ia32_psllq256(__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_srai_epi16(__m256i a, int count) _mm256_srai_epi16(__m256i __a, int __count)
{ {
return (__m256i)__builtin_ia32_psrawi256((__v16hi)a, count); return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sra_epi16(__m256i a, __m128i count) _mm256_sra_epi16(__m256i __a, __m128i __count)
{ {
return (__m256i)__builtin_ia32_psraw256((__v16hi)a, (__v8hi)count); return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_srai_epi32(__m256i a, int count) _mm256_srai_epi32(__m256i __a, int __count)
{ {
return (__m256i)__builtin_ia32_psradi256((__v8si)a, count); return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sra_epi32(__m256i a, __m128i count) _mm256_sra_epi32(__m256i __a, __m128i __count)
{ {
return (__m256i)__builtin_ia32_psrad256((__v8si)a, (__v4si)count); return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
} }
#define _mm256_srli_si256(a, count) __extension__ ({ \ #define _mm256_srli_si256(a, count) __extension__ ({ \
@ -588,141 +588,141 @@ _mm256_sra_epi32(__m256i a, __m128i count)
(__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); }) (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_srli_epi16(__m256i a, int count) _mm256_srli_epi16(__m256i __a, int __count)
{ {
return (__m256i)__builtin_ia32_psrlwi256((__v16hi)a, count); return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_srl_epi16(__m256i a, __m128i count) _mm256_srl_epi16(__m256i __a, __m128i __count)
{ {
return (__m256i)__builtin_ia32_psrlw256((__v16hi)a, (__v8hi)count); return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_srli_epi32(__m256i a, int count) _mm256_srli_epi32(__m256i __a, int __count)
{ {
return (__m256i)__builtin_ia32_psrldi256((__v8si)a, count); return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_srl_epi32(__m256i a, __m128i count) _mm256_srl_epi32(__m256i __a, __m128i __count)
{ {
return (__m256i)__builtin_ia32_psrld256((__v8si)a, (__v4si)count); return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_srli_epi64(__m256i a, int count) _mm256_srli_epi64(__m256i __a, int __count)
{ {
return __builtin_ia32_psrlqi256(a, count); return __builtin_ia32_psrlqi256(__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_srl_epi64(__m256i a, __m128i count) _mm256_srl_epi64(__m256i __a, __m128i __count)
{ {
return __builtin_ia32_psrlq256(a, count); return __builtin_ia32_psrlq256(__a, __count);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sub_epi8(__m256i a, __m256i b) _mm256_sub_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v32qi)a - (__v32qi)b); return (__m256i)((__v32qi)__a - (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sub_epi16(__m256i a, __m256i b) _mm256_sub_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v16hi)a - (__v16hi)b); return (__m256i)((__v16hi)__a - (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sub_epi32(__m256i a, __m256i b) _mm256_sub_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)((__v8si)a - (__v8si)b); return (__m256i)((__v8si)__a - (__v8si)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_sub_epi64(__m256i a, __m256i b) _mm256_sub_epi64(__m256i __a, __m256i __b)
{ {
return a - b; return __a - __b;
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_subs_epi8(__m256i a, __m256i b) _mm256_subs_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_psubsb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_subs_epi16(__m256i a, __m256i b) _mm256_subs_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_psubsw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_subs_epu8(__m256i a, __m256i b) _mm256_subs_epu8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_psubusb256((__v32qi)a, (__v32qi)b); return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_subs_epu16(__m256i a, __m256i b) _mm256_subs_epu16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_psubusw256((__v16hi)a, (__v16hi)b); return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_unpackhi_epi8(__m256i a, __m256i b) _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_unpackhi_epi16(__m256i a, __m256i b) _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_unpackhi_epi32(__m256i a, __m256i b) _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_unpackhi_epi64(__m256i a, __m256i b) _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_shufflevector(a, b, 1, 4+1, 3, 4+3); return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_unpacklo_epi8(__m256i a, __m256i b) _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_unpacklo_epi16(__m256i a, __m256i b) _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_unpacklo_epi32(__m256i a, __m256i b) _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_unpacklo_epi64(__m256i a, __m256i b) _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_shufflevector(a, b, 0, 4+0, 2, 4+2); return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2);
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_xor_si256(__m256i a, __m256i b) _mm256_xor_si256(__m256i __a, __m256i __b)
{ {
return a ^ b; return __a ^ __b;
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
@ -750,9 +750,9 @@ _mm256_broadcastsd_pd(__m128d __X)
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm_broadcastsi128_si256(__m128i const *a) _mm_broadcastsi128_si256(__m128i const *__a)
{ {
return (__m256i)__builtin_ia32_vbroadcastsi256(a); return (__m256i)__builtin_ia32_vbroadcastsi256(__a);
} }
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
@ -815,9 +815,9 @@ _mm_broadcastq_epi64(__m128i __X)
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_permutevar8x32_epi32(__m256i a, __m256i b) _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
{ {
return (__m256i)__builtin_ia32_permvarsi256((__v8si)a, (__v8si)b); return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
} }
#define _mm256_permute4x64_pd(V, M) __extension__ ({ \ #define _mm256_permute4x64_pd(V, M) __extension__ ({ \
@ -827,9 +827,9 @@ _mm256_permutevar8x32_epi32(__m256i a, __m256i b)
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_permutevar8x32_ps(__m256 a, __m256 b) _mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
{ {
return (__m256)__builtin_ia32_permvarsf256((__v8sf)a, (__v8sf)b); return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
} }
#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ #define _mm256_permute4x64_epi64(V, M) __extension__ ({ \

File diff suppressed because it is too large Load Diff

View File

@ -25,9 +25,10 @@
#error this header is for x86 only #error this header is for x86 only
#endif #endif
static inline int __get_cpuid (unsigned int level, unsigned int *eax, static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
unsigned int *ebx, unsigned int *ecx, unsigned int *__ebx, unsigned int *__ecx,
unsigned int *edx) { unsigned int *__edx) {
__asm("cpuid" : "=a"(*eax), "=b" (*ebx), "=c"(*ecx), "=d"(*edx) : "0"(level)); __asm("cpuid" : "=a"(*__eax), "=b" (*__ebx), "=c"(*__ecx), "=d"(*__edx)
: "0"(__level));
return 1; return 1;
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/*===---- f16cintrin.h - F16C intrinsics ---------------------------------=== /*===---- f16cintrin.h - F16C intrinsics ---------------------------------===
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining __a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights * in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
@ -44,15 +44,15 @@ typedef float __m256 __attribute__ ((__vector_size__ (32)));
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); }) (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
static __inline __m128 __attribute__((__always_inline__, __nodebug__)) static __inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cvtph_ps(__m128i a) _mm_cvtph_ps(__m128i __a)
{ {
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)a); return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
} }
static __inline __m256 __attribute__((__always_inline__, __nodebug__)) static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_cvtph_ps(__m128i a) _mm256_cvtph_ps(__m128i __a)
{ {
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)a); return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
} }
#endif /* __F16CINTRIN_H */ #endif /* __F16CINTRIN_H */

View File

@ -102,4 +102,13 @@ _rdrand64_step(unsigned long long *__p)
#include <rtmintrin.h> #include <rtmintrin.h>
#endif #endif
/* FIXME: check __HLE__ as well when HLE is supported. */
#if defined (__RTM__)
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_xtest(void)
{
return __builtin_ia32_xtest();
}
#endif
#endif /* __IMMINTRIN_H */ #endif /* __IMMINTRIN_H */

View File

@ -25,6 +25,7 @@
#define _MM3DNOW_H_INCLUDED #define _MM3DNOW_H_INCLUDED
#include <mmintrin.h> #include <mmintrin.h>
#include <prfchwintrin.h>
typedef float __v2sf __attribute__((__vector_size__(8))); typedef float __v2sf __attribute__((__vector_size__(8)));

View File

@ -30,45 +30,45 @@
#include <malloc.h> #include <malloc.h>
#else #else
#ifndef __cplusplus #ifndef __cplusplus
extern int posix_memalign(void **memptr, size_t alignment, size_t size); extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
#else #else
// Some systems (e.g. those with GNU libc) declare posix_memalign with an // Some systems (e.g. those with GNU libc) declare posix_memalign with an
// exception specifier. Via an "egregious workaround" in // exception specifier. Via an "egregious workaround" in
// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid // Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid
// redeclaration of glibc's declaration. // redeclaration of glibc's declaration.
extern "C" int posix_memalign(void **memptr, size_t alignment, size_t size); extern "C" int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
#endif #endif
#endif #endif
#if !(defined(_WIN32) && defined(_mm_malloc)) #if !(defined(_WIN32) && defined(_mm_malloc))
static __inline__ void *__attribute__((__always_inline__, __nodebug__, static __inline__ void *__attribute__((__always_inline__, __nodebug__,
__malloc__)) __malloc__))
_mm_malloc(size_t size, size_t align) _mm_malloc(size_t __size, size_t __align)
{ {
if (align == 1) { if (__align == 1) {
return malloc(size); return malloc(__size);
} }
if (!(align & (align - 1)) && align < sizeof(void *)) if (!(__align & (__align - 1)) && __align < sizeof(void *))
align = sizeof(void *); __align = sizeof(void *);
void *mallocedMemory; void *__mallocedMemory;
#if defined(__MINGW32__) #if defined(__MINGW32__)
mallocedMemory = __mingw_aligned_malloc(size, align); __mallocedMemory = __mingw_aligned_malloc(__size, __align);
#elif defined(_WIN32) #elif defined(_WIN32)
mallocedMemory = _aligned_malloc(size, align); __mallocedMemory = _aligned_malloc(__size, __align);
#else #else
if (posix_memalign(&mallocedMemory, align, size)) if (posix_memalign(&__mallocedMemory, __align, __size))
return 0; return 0;
#endif #endif
return mallocedMemory; return __mallocedMemory;
} }
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_free(void *p) _mm_free(void *__p)
{ {
free(p); free(__p);
} }
#endif #endif

View File

@ -17,6 +17,7 @@ module _Builtin_intrinsics [system] {
} }
explicit module cpuid { explicit module cpuid {
requires x86
header "cpuid.h" header "cpuid.h"
} }
@ -33,7 +34,6 @@ module _Builtin_intrinsics [system] {
explicit module sse { explicit module sse {
requires sse requires sse
export mmx export mmx
export * // note: for hackish <emmintrin.h> dependency
header "xmmintrin.h" header "xmmintrin.h"
} }

View File

@ -31,65 +31,65 @@
#include <emmintrin.h> #include <emmintrin.h>
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_lddqu_si128(__m128i const *p) _mm_lddqu_si128(__m128i const *__p)
{ {
return (__m128i)__builtin_ia32_lddqu((char const *)p); return (__m128i)__builtin_ia32_lddqu((char const *)__p);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_addsub_ps(__m128 a, __m128 b) _mm_addsub_ps(__m128 __a, __m128 __b)
{ {
return __builtin_ia32_addsubps(a, b); return __builtin_ia32_addsubps(__a, __b);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_hadd_ps(__m128 a, __m128 b) _mm_hadd_ps(__m128 __a, __m128 __b)
{ {
return __builtin_ia32_haddps(a, b); return __builtin_ia32_haddps(__a, __b);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_hsub_ps(__m128 a, __m128 b) _mm_hsub_ps(__m128 __a, __m128 __b)
{ {
return __builtin_ia32_hsubps(a, b); return __builtin_ia32_hsubps(__a, __b);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_movehdup_ps(__m128 a) _mm_movehdup_ps(__m128 __a)
{ {
return __builtin_shufflevector(a, a, 1, 1, 3, 3); return __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_moveldup_ps(__m128 a) _mm_moveldup_ps(__m128 __a)
{ {
return __builtin_shufflevector(a, a, 0, 0, 2, 2); return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
} }
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_addsub_pd(__m128d a, __m128d b) _mm_addsub_pd(__m128d __a, __m128d __b)
{ {
return __builtin_ia32_addsubpd(a, b); return __builtin_ia32_addsubpd(__a, __b);
} }
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_hadd_pd(__m128d a, __m128d b) _mm_hadd_pd(__m128d __a, __m128d __b)
{ {
return __builtin_ia32_haddpd(a, b); return __builtin_ia32_haddpd(__a, __b);
} }
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_hsub_pd(__m128d a, __m128d b) _mm_hsub_pd(__m128d __a, __m128d __b)
{ {
return __builtin_ia32_hsubpd(a, b); return __builtin_ia32_hsubpd(__a, __b);
} }
#define _mm_loaddup_pd(dp) _mm_load1_pd(dp) #define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_movedup_pd(__m128d a) _mm_movedup_pd(__m128d __a)
{ {
return __builtin_shufflevector(a, a, 0, 0); return __builtin_shufflevector(__a, __a, 0, 0);
} }
#define _MM_DENORMALS_ZERO_ON (0x0040) #define _MM_DENORMALS_ZERO_ON (0x0040)
@ -101,15 +101,15 @@ _mm_movedup_pd(__m128d a)
#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_monitor(void const *p, unsigned extensions, unsigned hints) _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
{ {
__builtin_ia32_monitor((void *)p, extensions, hints); __builtin_ia32_monitor((void *)__p, __extensions, __hints);
} }
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_mwait(unsigned extensions, unsigned hints) _mm_mwait(unsigned __extensions, unsigned __hints)
{ {
__builtin_ia32_mwait(extensions, hints); __builtin_ia32_mwait(__extensions, __hints);
} }
#endif /* __SSE3__ */ #endif /* __SSE3__ */

View File

@ -0,0 +1,34 @@
/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
#endif
#if defined(__PRFCHW__) || defined(__3dNOW__)
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_m_prefetchw(void *__P)
{
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
}
#endif

View File

@ -0,0 +1,48 @@
/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __X86INTRIN_H
#error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifdef __RDSEED__
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_rdseed16_step(unsigned short *__p)
{
return __builtin_ia32_rdseed16_step(__p);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_rdseed32_step(unsigned int *__p)
{
return __builtin_ia32_rdseed32_step(__p);
}
#ifdef __x86_64__
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_rdseed64_step(unsigned long long *__p)
{
return __builtin_ia32_rdseed64_step(__p);
}
#endif
#endif /* __RDSEED__ */

View File

@ -195,10 +195,10 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/* SSE4 Insertion and Extraction from XMM Register Instructions. */ /* SSE4 Insertion and Extraction from XMM Register Instructions. */
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
#define _mm_extract_ps(X, N) (__extension__ \ #define _mm_extract_ps(X, N) (__extension__ \
({ union { int i; float f; } __t; \ ({ union { int __i; float __f; } __t; \
__v4sf __a = (__v4sf)(X); \ __v4sf __a = (__v4sf)(X); \
__t.f = __a[N]; \ __t.__f = __a[N]; \
__t.i;})) __t.__i;}))
/* Miscellaneous insert and extract macros. */ /* Miscellaneous insert and extract macros. */
/* Extract a single-precision float from X at index N into D. */ /* Extract a single-precision float from X at index N into D. */

View File

@ -24,7 +24,12 @@
#ifndef __STDALIGN_H #ifndef __STDALIGN_H
#define __STDALIGN_H #define __STDALIGN_H
#ifndef __cplusplus
#define alignas _Alignas #define alignas _Alignas
#define alignof _Alignof
#endif
#define __alignas_is_defined 1 #define __alignas_is_defined 1
#define __alignof_is_defined 1
#endif /* __STDALIGN_H */ #endif /* __STDALIGN_H */

View File

@ -26,17 +26,42 @@
#ifndef __STDDEF_H #ifndef __STDDEF_H
#define __STDDEF_H #define __STDDEF_H
#ifndef _PTRDIFF_T #if !defined(_PTRDIFF_T) || __has_feature(modules)
/* Always define ptrdiff_t when modules are available. */
#if !__has_feature(modules)
#define _PTRDIFF_T #define _PTRDIFF_T
typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t;
#endif #endif
#ifndef _SIZE_T typedef __PTRDIFF_TYPE__ ptrdiff_t;
#endif
#if !defined(_SIZE_T) || __has_feature(modules)
/* Always define size_t when modules are available. */
#if !__has_feature(modules)
#define _SIZE_T #define _SIZE_T
typedef __typeof__(sizeof(int)) size_t;
#endif #endif
typedef __SIZE_TYPE__ size_t;
#endif
/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
* enabled. */
#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \
!defined(_RSIZE_T)) || __has_feature(modules)
/* Always define rsize_t when modules are available. */
#if !__has_feature(modules)
#define _RSIZE_T
#endif
typedef __SIZE_TYPE__ rsize_t;
#endif
#ifndef __cplusplus #ifndef __cplusplus
#ifndef _WCHAR_T /* Always define wchar_t when modules are available. */
#if !defined(_WCHAR_T) || __has_feature(modules)
#if !__has_feature(modules)
#define _WCHAR_T #define _WCHAR_T
#if defined(_MSC_EXTENSIONS)
#define _WCHAR_T_DEFINED
#endif
#endif
typedef __WCHAR_TYPE__ wchar_t; typedef __WCHAR_TYPE__ wchar_t;
#endif #endif
#endif #endif
@ -66,9 +91,12 @@ using ::std::nullptr_t;
/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use /* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use
__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ __WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
#if defined(__need_wint_t) #if defined(__need_wint_t)
#if !defined(_WINT_T) /* Always define wint_t when modules are available. */
#if !defined(_WINT_T) || __has_feature(modules)
#if !__has_feature(modules)
#define _WINT_T #define _WINT_T
#endif
typedef __WINT_TYPE__ wint_t; typedef __WINT_TYPE__ wint_t;
#endif /* _WINT_T */ #endif
#undef __need_wint_t #undef __need_wint_t
#endif /* __need_wint_t */ #endif /* __need_wint_t */

View File

@ -30,7 +30,48 @@
*/ */
#if __STDC_HOSTED__ && \ #if __STDC_HOSTED__ && \
defined(__has_include_next) && __has_include_next(<stdint.h>) defined(__has_include_next) && __has_include_next(<stdint.h>)
// C99 7.18.3 Limits of other integer types
//
// Footnote 219, 220: C++ implementations should define these macros only when
// __STDC_LIMIT_MACROS is defined before <stdint.h> is included.
//
// Footnote 222: C++ implementations should define these macros only when
// __STDC_CONSTANT_MACROS is defined before <stdint.h> is included.
//
// C++11 [cstdint.syn]p2:
//
// The macros defined by <cstdint> are provided unconditionally. In particular,
// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in
// footnotes 219, 220, and 222 in the C standard) play no role in C++.
//
// C11 removed the problematic footnotes.
//
// Work around this inconsistency by always defining those macros in C++ mode,
// so that a C library implementation which follows the C99 standard can be
// used in C++.
# ifdef __cplusplus
# if !defined(__STDC_LIMIT_MACROS)
# define __STDC_LIMIT_MACROS
# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG
# endif
# if !defined(__STDC_CONSTANT_MACROS)
# define __STDC_CONSTANT_MACROS
# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG
# endif
# endif
# include_next <stdint.h> # include_next <stdint.h>
# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG
# undef __STDC_LIMIT_MACROS
# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG
# endif
# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG
# undef __STDC_CONSTANT_MACROS
# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG
# endif
#else #else
/* C99 7.18.1.1 Exact-width integer types. /* C99 7.18.1.1 Exact-width integer types.
@ -626,6 +667,12 @@ typedef __UINTMAX_TYPE__ uintmax_t;
#define PTRDIFF_MAX __INTN_MAX(__PTRDIFF_WIDTH__) #define PTRDIFF_MAX __INTN_MAX(__PTRDIFF_WIDTH__)
#define SIZE_MAX __UINTN_MAX(__SIZE_WIDTH__) #define SIZE_MAX __UINTN_MAX(__SIZE_WIDTH__)
/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__
* is enabled. */
#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
#define RSIZE_MAX (SIZE_MAX >> 1)
#endif
/* C99 7.18.2.5 Limits of greatest-width integer types. */ /* C99 7.18.2.5 Limits of greatest-width integer types. */
#define INTMAX_MIN __INTN_MIN(__INTMAX_WIDTH__) #define INTMAX_MIN __INTN_MIN(__INTMAX_WIDTH__)
#define INTMAX_MAX __INTN_MAX(__INTMAX_WIDTH__) #define INTMAX_MAX __INTN_MAX(__INTMAX_WIDTH__)

View File

@ -0,0 +1,30 @@
/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __STDNORETURN_H
#define __STDNORETURN_H
#define noreturn _Noreturn
#define __noreturn_is_defined 1
#endif /* __STDNORETURN_H */

View File

@ -31,39 +31,39 @@
#include <pmmintrin.h> #include <pmmintrin.h>
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi8(__m64 a) _mm_abs_pi8(__m64 __a)
{ {
return (__m64)__builtin_ia32_pabsb((__v8qi)a); return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi8(__m128i a) _mm_abs_epi8(__m128i __a)
{ {
return (__m128i)__builtin_ia32_pabsb128((__v16qi)a); return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi16(__m64 a) _mm_abs_pi16(__m64 __a)
{ {
return (__m64)__builtin_ia32_pabsw((__v4hi)a); return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi16(__m128i a) _mm_abs_epi16(__m128i __a)
{ {
return (__m128i)__builtin_ia32_pabsw128((__v8hi)a); return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi32(__m64 a) _mm_abs_pi32(__m64 __a)
{ {
return (__m64)__builtin_ia32_pabsd((__v2si)a); return (__m64)__builtin_ia32_pabsd((__v2si)__a);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi32(__m128i a) _mm_abs_epi32(__m128i __a)
{ {
return (__m128i)__builtin_ia32_pabsd128((__v4si)a); return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
} }
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \ #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
@ -77,147 +77,147 @@ _mm_abs_epi32(__m128i a)
(__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); }) (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadd_epi16(__m128i a, __m128i b) _mm_hadd_epi16(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b); return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadd_epi32(__m128i a, __m128i b) _mm_hadd_epi32(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b); return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadd_pi16(__m64 a, __m64 b) _mm_hadd_pi16(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b); return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadd_pi32(__m64 a, __m64 b) _mm_hadd_pi32(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b); return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadds_epi16(__m128i a, __m128i b) _mm_hadds_epi16(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b); return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadds_pi16(__m64 a, __m64 b) _mm_hadds_pi16(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b); return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsub_epi16(__m128i a, __m128i b) _mm_hsub_epi16(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b); return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsub_epi32(__m128i a, __m128i b) _mm_hsub_epi32(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b); return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsub_pi16(__m64 a, __m64 b) _mm_hsub_pi16(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b); return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsub_pi32(__m64 a, __m64 b) _mm_hsub_pi32(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b); return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubs_epi16(__m128i a, __m128i b) _mm_hsubs_epi16(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b); return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsubs_pi16(__m64 a, __m64 b) _mm_hsubs_pi16(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b); return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maddubs_epi16(__m128i a, __m128i b) _mm_maddubs_epi16(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b); return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_maddubs_pi16(__m64 a, __m64 b) _mm_maddubs_pi16(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b); return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_mulhrs_epi16(__m128i a, __m128i b) _mm_mulhrs_epi16(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b); return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_mulhrs_pi16(__m64 a, __m64 b) _mm_mulhrs_pi16(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b); return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shuffle_epi8(__m128i a, __m128i b) _mm_shuffle_epi8(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b); return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_shuffle_pi8(__m64 a, __m64 b) _mm_shuffle_pi8(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b); return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi8(__m128i a, __m128i b) _mm_sign_epi8(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b); return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi16(__m128i a, __m128i b) _mm_sign_epi16(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b); return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi32(__m128i a, __m128i b) _mm_sign_epi32(__m128i __a, __m128i __b)
{ {
return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b); return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi8(__m64 a, __m64 b) _mm_sign_pi8(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b); return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi16(__m64 a, __m64 b) _mm_sign_pi16(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b); return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
} }
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi32(__m64 a, __m64 b) _mm_sign_pi32(__m64 __a, __m64 __b)
{ {
return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b); return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
} }
#endif /* __SSSE3__ */ #endif /* __SSSE3__ */

View File

@ -23,6 +23,9 @@
/* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/ /* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/
#ifndef __CLANG_UNWIND_H
#define __CLANG_UNWIND_H
#if __has_include_next(<unwind.h>) #if __has_include_next(<unwind.h>)
/* Darwin and libunwind provide an unwind.h. If that's available, use /* Darwin and libunwind provide an unwind.h. If that's available, use
* it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, * it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
@ -59,7 +62,9 @@ extern "C" {
/* It is a bit strange for a header to play with the visibility of the /* It is a bit strange for a header to play with the visibility of the
symbols it declares, but this matches gcc's behavior and some programs symbols it declares, but this matches gcc's behavior and some programs
depend on it */ depend on it */
#ifndef HIDE_EXPORTS
#pragma GCC visibility push(default) #pragma GCC visibility push(default)
#endif
struct _Unwind_Context; struct _Unwind_Context;
typedef enum { typedef enum {
@ -100,25 +105,29 @@ typedef enum {
_UVRSR_FAILED = 2 _UVRSR_FAILED = 2
} _Unwind_VRS_Result; } _Unwind_VRS_Result;
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *context, _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
_Unwind_VRS_RegClass regclass, _Unwind_VRS_RegClass __regclass,
uint32_t regno, uint32_t __regno,
_Unwind_VRS_DataRepresentation representation, _Unwind_VRS_DataRepresentation __representation,
void *valuep); void *__valuep);
#else #else
uintptr_t _Unwind_GetIP(struct _Unwind_Context* context); uintptr_t _Unwind_GetIP(struct _Unwind_Context* __context);
#endif #endif
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context*, void*); typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context*, void*);
_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void*); _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void*);
#ifndef HIDE_EXPORTS
#pragma GCC visibility pop #pragma GCC visibility pop
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif #endif
#endif /* __CLANG_UNWIND_H */

View File

@ -46,6 +46,14 @@
#include <popcntintrin.h> #include <popcntintrin.h>
#endif #endif
#ifdef __RDSEED__
#include <rdseedintrin.h>
#endif
#ifdef __PRFCHW__
#include <prfchwintrin.h>
#endif
#ifdef __SSE4A__ #ifdef __SSE4A__
#include <ammintrin.h> #include <ammintrin.h>
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/*===---- xopintrin.h - FMA4 intrinsics ------------------------------------=== /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
@ -22,7 +22,7 @@
*/ */
#ifndef __X86INTRIN_H #ifndef __X86INTRIN_H
#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead." #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
#endif #endif
#ifndef __XOPINTRIN_H #ifndef __XOPINTRIN_H