Using the new clang 3.3 headers
This commit is contained in:
parent
3d02f0aba3
commit
280c704814
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -29,39 +29,39 @@
|
|||||||
#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
|
#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_abs_epi8(__m256i a)
|
_mm256_abs_epi8(__m256i __a)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pabsb256((__v32qi)a);
|
return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_abs_epi16(__m256i a)
|
_mm256_abs_epi16(__m256i __a)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pabsw256((__v16hi)a);
|
return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_abs_epi32(__m256i a)
|
_mm256_abs_epi32(__m256i __a)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pabsd256((__v8si)a);
|
return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_packs_epi16(__m256i a, __m256i b)
|
_mm256_packs_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_packsswb256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_packs_epi32(__m256i a, __m256i b)
|
_mm256_packs_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_packssdw256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_packus_epi16(__m256i a, __m256i b)
|
_mm256_packus_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_packuswb256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
@ -71,51 +71,51 @@ _mm256_packus_epi32(__m256i __V1, __m256i __V2)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_add_epi8(__m256i a, __m256i b)
|
_mm256_add_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v32qi)a + (__v32qi)b);
|
return (__m256i)((__v32qi)__a + (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_add_epi16(__m256i a, __m256i b)
|
_mm256_add_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v16hi)a + (__v16hi)b);
|
return (__m256i)((__v16hi)__a + (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_add_epi32(__m256i a, __m256i b)
|
_mm256_add_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v8si)a + (__v8si)b);
|
return (__m256i)((__v8si)__a + (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_add_epi64(__m256i a, __m256i b)
|
_mm256_add_epi64(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return a + b;
|
return __a + __b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_adds_epi8(__m256i a, __m256i b)
|
_mm256_adds_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_paddsb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_adds_epi16(__m256i a, __m256i b)
|
_mm256_adds_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_paddsw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_adds_epu8(__m256i a, __m256i b)
|
_mm256_adds_epu8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_paddusb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_adds_epu16(__m256i a, __m256i b)
|
_mm256_adds_epu16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_paddusw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
|
#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
|
||||||
@ -124,27 +124,27 @@ _mm256_adds_epu16(__m256i a, __m256i b)
|
|||||||
(__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
|
(__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_and_si256(__m256i a, __m256i b)
|
_mm256_and_si256(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return a & b;
|
return __a & __b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_andnot_si256(__m256i a, __m256i b)
|
_mm256_andnot_si256(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return ~a & b;
|
return ~__a & __b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_avg_epu8(__m256i a, __m256i b)
|
_mm256_avg_epu8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pavgb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_avg_epu16(__m256i a, __m256i b)
|
_mm256_avg_epu16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pavgw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
@ -160,177 +160,177 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
|
|||||||
(__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); })
|
(__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); })
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cmpeq_epi8(__m256i a, __m256i b)
|
_mm256_cmpeq_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v32qi)a == (__v32qi)b);
|
return (__m256i)((__v32qi)__a == (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cmpeq_epi16(__m256i a, __m256i b)
|
_mm256_cmpeq_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v16hi)a == (__v16hi)b);
|
return (__m256i)((__v16hi)__a == (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cmpeq_epi32(__m256i a, __m256i b)
|
_mm256_cmpeq_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v8si)a == (__v8si)b);
|
return (__m256i)((__v8si)__a == (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cmpeq_epi64(__m256i a, __m256i b)
|
_mm256_cmpeq_epi64(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)(a == b);
|
return (__m256i)(__a == __b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cmpgt_epi8(__m256i a, __m256i b)
|
_mm256_cmpgt_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v32qi)a > (__v32qi)b);
|
return (__m256i)((__v32qi)__a > (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cmpgt_epi16(__m256i a, __m256i b)
|
_mm256_cmpgt_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v16hi)a > (__v16hi)b);
|
return (__m256i)((__v16hi)__a > (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cmpgt_epi32(__m256i a, __m256i b)
|
_mm256_cmpgt_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v8si)a > (__v8si)b);
|
return (__m256i)((__v8si)__a > (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cmpgt_epi64(__m256i a, __m256i b)
|
_mm256_cmpgt_epi64(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)(a > b);
|
return (__m256i)(__a > __b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_hadd_epi16(__m256i a, __m256i b)
|
_mm256_hadd_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_phaddw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_hadd_epi32(__m256i a, __m256i b)
|
_mm256_hadd_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_phaddd256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_hadds_epi16(__m256i a, __m256i b)
|
_mm256_hadds_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_phaddsw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_hsub_epi16(__m256i a, __m256i b)
|
_mm256_hsub_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_phsubw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_hsub_epi32(__m256i a, __m256i b)
|
_mm256_hsub_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_phsubd256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_hsubs_epi16(__m256i a, __m256i b)
|
_mm256_hsubs_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_maddubs_epi16(__m256i a, __m256i b)
|
_mm256_maddubs_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_madd_epi16(__m256i a, __m256i b)
|
_mm256_madd_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_max_epi8(__m256i a, __m256i b)
|
_mm256_max_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_max_epi16(__m256i a, __m256i b)
|
_mm256_max_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_max_epi32(__m256i a, __m256i b)
|
_mm256_max_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmaxsd256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_max_epu8(__m256i a, __m256i b)
|
_mm256_max_epu8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmaxub256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_max_epu16(__m256i a, __m256i b)
|
_mm256_max_epu16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_max_epu32(__m256i a, __m256i b)
|
_mm256_max_epu32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmaxud256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_min_epi8(__m256i a, __m256i b)
|
_mm256_min_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pminsb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_min_epi16(__m256i a, __m256i b)
|
_mm256_min_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pminsw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_min_epi32(__m256i a, __m256i b)
|
_mm256_min_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pminsd256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_min_epu8(__m256i a, __m256i b)
|
_mm256_min_epu8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pminub256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_min_epu16(__m256i a, __m256i b)
|
_mm256_min_epu16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_min_epu32(__m256i a, __m256i b)
|
_mm256_min_epu32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pminud256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_movemask_epi8(__m256i a)
|
_mm256_movemask_epi8(__m256i __a)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_pmovmskb256((__v32qi)a);
|
return __builtin_ia32_pmovmskb256((__v32qi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
@ -406,63 +406,63 @@ _mm256_cvtepu32_epi64(__m128i __V)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_mul_epi32(__m256i a, __m256i b)
|
_mm256_mul_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_mulhrs_epi16(__m256i a, __m256i b)
|
_mm256_mulhrs_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_mulhi_epu16(__m256i a, __m256i b)
|
_mm256_mulhi_epu16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_mulhi_epi16(__m256i a, __m256i b)
|
_mm256_mulhi_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_mullo_epi16(__m256i a, __m256i b)
|
_mm256_mullo_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v16hi)a * (__v16hi)b);
|
return (__m256i)((__v16hi)__a * (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_mullo_epi32 (__m256i a, __m256i b)
|
_mm256_mullo_epi32 (__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v8si)a * (__v8si)b);
|
return (__m256i)((__v8si)__a * (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_mul_epu32(__m256i a, __m256i b)
|
_mm256_mul_epu32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b);
|
return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_or_si256(__m256i a, __m256i b)
|
_mm256_or_si256(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return a | b;
|
return __a | __b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sad_epu8(__m256i a, __m256i b)
|
_mm256_sad_epu8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_psadbw256((__v32qi)a, (__v32qi)b);
|
return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_shuffle_epi8(__m256i a, __m256i b)
|
_mm256_shuffle_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pshufb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
|
#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
|
||||||
@ -502,21 +502,21 @@ _mm256_shuffle_epi8(__m256i a, __m256i b)
|
|||||||
12, 13, 14, 15); })
|
12, 13, 14, 15); })
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sign_epi8(__m256i a, __m256i b)
|
_mm256_sign_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psignb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sign_epi16(__m256i a, __m256i b)
|
_mm256_sign_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psignw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sign_epi32(__m256i a, __m256i b)
|
_mm256_sign_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psignd256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm256_slli_si256(a, count) __extension__ ({ \
|
#define _mm256_slli_si256(a, count) __extension__ ({ \
|
||||||
@ -524,63 +524,63 @@ _mm256_sign_epi32(__m256i a, __m256i b)
|
|||||||
(__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
|
(__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_slli_epi16(__m256i a, int count)
|
_mm256_slli_epi16(__m256i __a, int __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psllwi256((__v16hi)a, count);
|
return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sll_epi16(__m256i a, __m128i count)
|
_mm256_sll_epi16(__m256i __a, __m128i __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psllw256((__v16hi)a, (__v8hi)count);
|
return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_slli_epi32(__m256i a, int count)
|
_mm256_slli_epi32(__m256i __a, int __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pslldi256((__v8si)a, count);
|
return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sll_epi32(__m256i a, __m128i count)
|
_mm256_sll_epi32(__m256i __a, __m128i __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_pslld256((__v8si)a, (__v4si)count);
|
return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_slli_epi64(__m256i a, int count)
|
_mm256_slli_epi64(__m256i __a, int __count)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_psllqi256(a, count);
|
return __builtin_ia32_psllqi256(__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sll_epi64(__m256i a, __m128i count)
|
_mm256_sll_epi64(__m256i __a, __m128i __count)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_psllq256(a, count);
|
return __builtin_ia32_psllq256(__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_srai_epi16(__m256i a, int count)
|
_mm256_srai_epi16(__m256i __a, int __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psrawi256((__v16hi)a, count);
|
return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sra_epi16(__m256i a, __m128i count)
|
_mm256_sra_epi16(__m256i __a, __m128i __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psraw256((__v16hi)a, (__v8hi)count);
|
return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_srai_epi32(__m256i a, int count)
|
_mm256_srai_epi32(__m256i __a, int __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psradi256((__v8si)a, count);
|
return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sra_epi32(__m256i a, __m128i count)
|
_mm256_sra_epi32(__m256i __a, __m128i __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psrad256((__v8si)a, (__v4si)count);
|
return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm256_srli_si256(a, count) __extension__ ({ \
|
#define _mm256_srli_si256(a, count) __extension__ ({ \
|
||||||
@ -588,141 +588,141 @@ _mm256_sra_epi32(__m256i a, __m128i count)
|
|||||||
(__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
|
(__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_srli_epi16(__m256i a, int count)
|
_mm256_srli_epi16(__m256i __a, int __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psrlwi256((__v16hi)a, count);
|
return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_srl_epi16(__m256i a, __m128i count)
|
_mm256_srl_epi16(__m256i __a, __m128i __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psrlw256((__v16hi)a, (__v8hi)count);
|
return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_srli_epi32(__m256i a, int count)
|
_mm256_srli_epi32(__m256i __a, int __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psrldi256((__v8si)a, count);
|
return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_srl_epi32(__m256i a, __m128i count)
|
_mm256_srl_epi32(__m256i __a, __m128i __count)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psrld256((__v8si)a, (__v4si)count);
|
return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_srli_epi64(__m256i a, int count)
|
_mm256_srli_epi64(__m256i __a, int __count)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_psrlqi256(a, count);
|
return __builtin_ia32_psrlqi256(__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_srl_epi64(__m256i a, __m128i count)
|
_mm256_srl_epi64(__m256i __a, __m128i __count)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_psrlq256(a, count);
|
return __builtin_ia32_psrlq256(__a, __count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sub_epi8(__m256i a, __m256i b)
|
_mm256_sub_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v32qi)a - (__v32qi)b);
|
return (__m256i)((__v32qi)__a - (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sub_epi16(__m256i a, __m256i b)
|
_mm256_sub_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v16hi)a - (__v16hi)b);
|
return (__m256i)((__v16hi)__a - (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sub_epi32(__m256i a, __m256i b)
|
_mm256_sub_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)((__v8si)a - (__v8si)b);
|
return (__m256i)((__v8si)__a - (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_sub_epi64(__m256i a, __m256i b)
|
_mm256_sub_epi64(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return a - b;
|
return __a - __b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_subs_epi8(__m256i a, __m256i b)
|
_mm256_subs_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psubsb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_subs_epi16(__m256i a, __m256i b)
|
_mm256_subs_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psubsw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_subs_epu8(__m256i a, __m256i b)
|
_mm256_subs_epu8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psubusb256((__v32qi)a, (__v32qi)b);
|
return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_subs_epu16(__m256i a, __m256i b)
|
_mm256_subs_epu16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_psubusw256((__v16hi)a, (__v16hi)b);
|
return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_unpackhi_epi8(__m256i a, __m256i b)
|
_mm256_unpackhi_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
|
return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_unpackhi_epi16(__m256i a, __m256i b)
|
_mm256_unpackhi_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
|
return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_unpackhi_epi32(__m256i a, __m256i b)
|
_mm256_unpackhi_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
|
return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_unpackhi_epi64(__m256i a, __m256i b)
|
_mm256_unpackhi_epi64(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_shufflevector(a, b, 1, 4+1, 3, 4+3);
|
return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_unpacklo_epi8(__m256i a, __m256i b)
|
_mm256_unpacklo_epi8(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_shufflevector((__v32qi)a, (__v32qi)b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
|
return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_unpacklo_epi16(__m256i a, __m256i b)
|
_mm256_unpacklo_epi16(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_shufflevector((__v16hi)a, (__v16hi)b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
|
return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_unpacklo_epi32(__m256i a, __m256i b)
|
_mm256_unpacklo_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_shufflevector((__v8si)a, (__v8si)b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
|
return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_unpacklo_epi64(__m256i a, __m256i b)
|
_mm256_unpacklo_epi64(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_shufflevector(a, b, 0, 4+0, 2, 4+2);
|
return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_xor_si256(__m256i a, __m256i b)
|
_mm256_xor_si256(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return a ^ b;
|
return __a ^ __b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
@ -750,9 +750,9 @@ _mm256_broadcastsd_pd(__m128d __X)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_broadcastsi128_si256(__m128i const *a)
|
_mm_broadcastsi128_si256(__m128i const *__a)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_vbroadcastsi256(a);
|
return (__m256i)__builtin_ia32_vbroadcastsi256(__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
|
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
|
||||||
@ -815,9 +815,9 @@ _mm_broadcastq_epi64(__m128i __X)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_permutevar8x32_epi32(__m256i a, __m256i b)
|
_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
|
||||||
{
|
{
|
||||||
return (__m256i)__builtin_ia32_permvarsi256((__v8si)a, (__v8si)b);
|
return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm256_permute4x64_pd(V, M) __extension__ ({ \
|
#define _mm256_permute4x64_pd(V, M) __extension__ ({ \
|
||||||
@ -827,9 +827,9 @@ _mm256_permutevar8x32_epi32(__m256i a, __m256i b)
|
|||||||
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
|
((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
|
||||||
|
|
||||||
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_permutevar8x32_ps(__m256 a, __m256 b)
|
_mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
|
||||||
{
|
{
|
||||||
return (__m256)__builtin_ia32_permvarsf256((__v8sf)a, (__v8sf)b);
|
return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
|
#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -25,9 +25,10 @@
|
|||||||
#error this header is for x86 only
|
#error this header is for x86 only
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline int __get_cpuid (unsigned int level, unsigned int *eax,
|
static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
|
||||||
unsigned int *ebx, unsigned int *ecx,
|
unsigned int *__ebx, unsigned int *__ecx,
|
||||||
unsigned int *edx) {
|
unsigned int *__edx) {
|
||||||
__asm("cpuid" : "=a"(*eax), "=b" (*ebx), "=c"(*ecx), "=d"(*edx) : "0"(level));
|
__asm("cpuid" : "=a"(*__eax), "=b" (*__ebx), "=c"(*__ecx), "=d"(*__edx)
|
||||||
|
: "0"(__level));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
/*===---- f16cintrin.h - F16C intrinsics ---------------------------------===
|
/*===---- f16cintrin.h - F16C intrinsics ---------------------------------===
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
* Permission is hereby granted, free of charge, to any person obtaining __a copy
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
* in the Software without restriction, including without limitation the rights
|
* in the Software without restriction, including without limitation the rights
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
@ -44,15 +44,15 @@ typedef float __m256 __attribute__ ((__vector_size__ (32)));
|
|||||||
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
|
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
|
||||||
|
|
||||||
static __inline __m128 __attribute__((__always_inline__, __nodebug__))
|
static __inline __m128 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_cvtph_ps(__m128i a)
|
_mm_cvtph_ps(__m128i __a)
|
||||||
{
|
{
|
||||||
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)a);
|
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm256_cvtph_ps(__m128i a)
|
_mm256_cvtph_ps(__m128i __a)
|
||||||
{
|
{
|
||||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)a);
|
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __F16CINTRIN_H */
|
#endif /* __F16CINTRIN_H */
|
||||||
|
@ -102,4 +102,13 @@ _rdrand64_step(unsigned long long *__p)
|
|||||||
#include <rtmintrin.h>
|
#include <rtmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* FIXME: check __HLE__ as well when HLE is supported. */
|
||||||
|
#if defined (__RTM__)
|
||||||
|
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||||
|
_xtest(void)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_xtest();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* __IMMINTRIN_H */
|
#endif /* __IMMINTRIN_H */
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#define _MM3DNOW_H_INCLUDED
|
#define _MM3DNOW_H_INCLUDED
|
||||||
|
|
||||||
#include <mmintrin.h>
|
#include <mmintrin.h>
|
||||||
|
#include <prfchwintrin.h>
|
||||||
|
|
||||||
typedef float __v2sf __attribute__((__vector_size__(8)));
|
typedef float __v2sf __attribute__((__vector_size__(8)));
|
||||||
|
|
||||||
|
@ -30,45 +30,45 @@
|
|||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#else
|
#else
|
||||||
#ifndef __cplusplus
|
#ifndef __cplusplus
|
||||||
extern int posix_memalign(void **memptr, size_t alignment, size_t size);
|
extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
|
||||||
#else
|
#else
|
||||||
// Some systems (e.g. those with GNU libc) declare posix_memalign with an
|
// Some systems (e.g. those with GNU libc) declare posix_memalign with an
|
||||||
// exception specifier. Via an "egregious workaround" in
|
// exception specifier. Via an "egregious workaround" in
|
||||||
// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid
|
// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid
|
||||||
// redeclaration of glibc's declaration.
|
// redeclaration of glibc's declaration.
|
||||||
extern "C" int posix_memalign(void **memptr, size_t alignment, size_t size);
|
extern "C" int posix_memalign(void **__memptr, size_t __alignment, size_t __size);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !(defined(_WIN32) && defined(_mm_malloc))
|
#if !(defined(_WIN32) && defined(_mm_malloc))
|
||||||
static __inline__ void *__attribute__((__always_inline__, __nodebug__,
|
static __inline__ void *__attribute__((__always_inline__, __nodebug__,
|
||||||
__malloc__))
|
__malloc__))
|
||||||
_mm_malloc(size_t size, size_t align)
|
_mm_malloc(size_t __size, size_t __align)
|
||||||
{
|
{
|
||||||
if (align == 1) {
|
if (__align == 1) {
|
||||||
return malloc(size);
|
return malloc(__size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(align & (align - 1)) && align < sizeof(void *))
|
if (!(__align & (__align - 1)) && __align < sizeof(void *))
|
||||||
align = sizeof(void *);
|
__align = sizeof(void *);
|
||||||
|
|
||||||
void *mallocedMemory;
|
void *__mallocedMemory;
|
||||||
#if defined(__MINGW32__)
|
#if defined(__MINGW32__)
|
||||||
mallocedMemory = __mingw_aligned_malloc(size, align);
|
__mallocedMemory = __mingw_aligned_malloc(__size, __align);
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
mallocedMemory = _aligned_malloc(size, align);
|
__mallocedMemory = _aligned_malloc(__size, __align);
|
||||||
#else
|
#else
|
||||||
if (posix_memalign(&mallocedMemory, align, size))
|
if (posix_memalign(&__mallocedMemory, __align, __size))
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return mallocedMemory;
|
return __mallocedMemory;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_free(void *p)
|
_mm_free(void *__p)
|
||||||
{
|
{
|
||||||
free(p);
|
free(__p);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ module _Builtin_intrinsics [system] {
|
|||||||
}
|
}
|
||||||
|
|
||||||
explicit module cpuid {
|
explicit module cpuid {
|
||||||
|
requires x86
|
||||||
header "cpuid.h"
|
header "cpuid.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -33,7 +34,6 @@ module _Builtin_intrinsics [system] {
|
|||||||
explicit module sse {
|
explicit module sse {
|
||||||
requires sse
|
requires sse
|
||||||
export mmx
|
export mmx
|
||||||
export * // note: for hackish <emmintrin.h> dependency
|
|
||||||
header "xmmintrin.h"
|
header "xmmintrin.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,65 +31,65 @@
|
|||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_lddqu_si128(__m128i const *p)
|
_mm_lddqu_si128(__m128i const *__p)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_lddqu((char const *)p);
|
return (__m128i)__builtin_ia32_lddqu((char const *)__p);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_addsub_ps(__m128 a, __m128 b)
|
_mm_addsub_ps(__m128 __a, __m128 __b)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_addsubps(a, b);
|
return __builtin_ia32_addsubps(__a, __b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hadd_ps(__m128 a, __m128 b)
|
_mm_hadd_ps(__m128 __a, __m128 __b)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_haddps(a, b);
|
return __builtin_ia32_haddps(__a, __b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hsub_ps(__m128 a, __m128 b)
|
_mm_hsub_ps(__m128 __a, __m128 __b)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_hsubps(a, b);
|
return __builtin_ia32_hsubps(__a, __b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_movehdup_ps(__m128 a)
|
_mm_movehdup_ps(__m128 __a)
|
||||||
{
|
{
|
||||||
return __builtin_shufflevector(a, a, 1, 1, 3, 3);
|
return __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_moveldup_ps(__m128 a)
|
_mm_moveldup_ps(__m128 __a)
|
||||||
{
|
{
|
||||||
return __builtin_shufflevector(a, a, 0, 0, 2, 2);
|
return __builtin_shufflevector(__a, __a, 0, 0, 2, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_addsub_pd(__m128d a, __m128d b)
|
_mm_addsub_pd(__m128d __a, __m128d __b)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_addsubpd(a, b);
|
return __builtin_ia32_addsubpd(__a, __b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hadd_pd(__m128d a, __m128d b)
|
_mm_hadd_pd(__m128d __a, __m128d __b)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_haddpd(a, b);
|
return __builtin_ia32_haddpd(__a, __b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hsub_pd(__m128d a, __m128d b)
|
_mm_hsub_pd(__m128d __a, __m128d __b)
|
||||||
{
|
{
|
||||||
return __builtin_ia32_hsubpd(a, b);
|
return __builtin_ia32_hsubpd(__a, __b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
|
#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
|
||||||
|
|
||||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_movedup_pd(__m128d a)
|
_mm_movedup_pd(__m128d __a)
|
||||||
{
|
{
|
||||||
return __builtin_shufflevector(a, a, 0, 0);
|
return __builtin_shufflevector(__a, __a, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _MM_DENORMALS_ZERO_ON (0x0040)
|
#define _MM_DENORMALS_ZERO_ON (0x0040)
|
||||||
@ -101,15 +101,15 @@ _mm_movedup_pd(__m128d a)
|
|||||||
#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
|
#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
|
||||||
|
|
||||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_monitor(void const *p, unsigned extensions, unsigned hints)
|
_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
|
||||||
{
|
{
|
||||||
__builtin_ia32_monitor((void *)p, extensions, hints);
|
__builtin_ia32_monitor((void *)__p, __extensions, __hints);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_mwait(unsigned extensions, unsigned hints)
|
_mm_mwait(unsigned __extensions, unsigned __hints)
|
||||||
{
|
{
|
||||||
__builtin_ia32_mwait(extensions, hints);
|
__builtin_ia32_mwait(__extensions, __hints);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __SSE3__ */
|
#endif /* __SSE3__ */
|
||||||
|
34
python/clang_includes/prfchwintrin.h
Normal file
34
python/clang_includes/prfchwintrin.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------===
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*===-----------------------------------------------------------------------===
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)
|
||||||
|
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__PRFCHW__) || defined(__3dNOW__)
|
||||||
|
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||||
|
_m_prefetchw(void *__P)
|
||||||
|
{
|
||||||
|
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
|
||||||
|
}
|
||||||
|
#endif
|
48
python/clang_includes/rdseedintrin.h
Normal file
48
python/clang_includes/rdseedintrin.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------===
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*===-----------------------------------------------------------------------===
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __X86INTRIN_H
|
||||||
|
#error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __RDSEED__
|
||||||
|
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||||
|
_rdseed16_step(unsigned short *__p)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_rdseed16_step(__p);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||||
|
_rdseed32_step(unsigned int *__p)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_rdseed32_step(__p);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __x86_64__
|
||||||
|
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||||
|
_rdseed64_step(unsigned long long *__p)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_rdseed64_step(__p);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif /* __RDSEED__ */
|
@ -195,10 +195,10 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
|||||||
/* SSE4 Insertion and Extraction from XMM Register Instructions. */
|
/* SSE4 Insertion and Extraction from XMM Register Instructions. */
|
||||||
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
|
#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
|
||||||
#define _mm_extract_ps(X, N) (__extension__ \
|
#define _mm_extract_ps(X, N) (__extension__ \
|
||||||
({ union { int i; float f; } __t; \
|
({ union { int __i; float __f; } __t; \
|
||||||
__v4sf __a = (__v4sf)(X); \
|
__v4sf __a = (__v4sf)(X); \
|
||||||
__t.f = __a[N]; \
|
__t.__f = __a[N]; \
|
||||||
__t.i;}))
|
__t.__i;}))
|
||||||
|
|
||||||
/* Miscellaneous insert and extract macros. */
|
/* Miscellaneous insert and extract macros. */
|
||||||
/* Extract a single-precision float from X at index N into D. */
|
/* Extract a single-precision float from X at index N into D. */
|
||||||
|
@ -24,7 +24,12 @@
|
|||||||
#ifndef __STDALIGN_H
|
#ifndef __STDALIGN_H
|
||||||
#define __STDALIGN_H
|
#define __STDALIGN_H
|
||||||
|
|
||||||
|
#ifndef __cplusplus
|
||||||
#define alignas _Alignas
|
#define alignas _Alignas
|
||||||
|
#define alignof _Alignof
|
||||||
|
#endif
|
||||||
|
|
||||||
#define __alignas_is_defined 1
|
#define __alignas_is_defined 1
|
||||||
|
#define __alignof_is_defined 1
|
||||||
|
|
||||||
#endif /* __STDALIGN_H */
|
#endif /* __STDALIGN_H */
|
||||||
|
@ -26,17 +26,42 @@
|
|||||||
#ifndef __STDDEF_H
|
#ifndef __STDDEF_H
|
||||||
#define __STDDEF_H
|
#define __STDDEF_H
|
||||||
|
|
||||||
#ifndef _PTRDIFF_T
|
#if !defined(_PTRDIFF_T) || __has_feature(modules)
|
||||||
|
/* Always define ptrdiff_t when modules are available. */
|
||||||
|
#if !__has_feature(modules)
|
||||||
#define _PTRDIFF_T
|
#define _PTRDIFF_T
|
||||||
typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t;
|
|
||||||
#endif
|
#endif
|
||||||
#ifndef _SIZE_T
|
typedef __PTRDIFF_TYPE__ ptrdiff_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(_SIZE_T) || __has_feature(modules)
|
||||||
|
/* Always define size_t when modules are available. */
|
||||||
|
#if !__has_feature(modules)
|
||||||
#define _SIZE_T
|
#define _SIZE_T
|
||||||
typedef __typeof__(sizeof(int)) size_t;
|
|
||||||
#endif
|
#endif
|
||||||
|
typedef __SIZE_TYPE__ size_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
|
||||||
|
* enabled. */
|
||||||
|
#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \
|
||||||
|
!defined(_RSIZE_T)) || __has_feature(modules)
|
||||||
|
/* Always define rsize_t when modules are available. */
|
||||||
|
#if !__has_feature(modules)
|
||||||
|
#define _RSIZE_T
|
||||||
|
#endif
|
||||||
|
typedef __SIZE_TYPE__ rsize_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef __cplusplus
|
#ifndef __cplusplus
|
||||||
#ifndef _WCHAR_T
|
/* Always define wchar_t when modules are available. */
|
||||||
|
#if !defined(_WCHAR_T) || __has_feature(modules)
|
||||||
|
#if !__has_feature(modules)
|
||||||
#define _WCHAR_T
|
#define _WCHAR_T
|
||||||
|
#if defined(_MSC_EXTENSIONS)
|
||||||
|
#define _WCHAR_T_DEFINED
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
typedef __WCHAR_TYPE__ wchar_t;
|
typedef __WCHAR_TYPE__ wchar_t;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
@ -66,9 +91,12 @@ using ::std::nullptr_t;
|
|||||||
/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use
|
/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use
|
||||||
__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
|
__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
|
||||||
#if defined(__need_wint_t)
|
#if defined(__need_wint_t)
|
||||||
#if !defined(_WINT_T)
|
/* Always define wint_t when modules are available. */
|
||||||
|
#if !defined(_WINT_T) || __has_feature(modules)
|
||||||
|
#if !__has_feature(modules)
|
||||||
#define _WINT_T
|
#define _WINT_T
|
||||||
|
#endif
|
||||||
typedef __WINT_TYPE__ wint_t;
|
typedef __WINT_TYPE__ wint_t;
|
||||||
#endif /* _WINT_T */
|
#endif
|
||||||
#undef __need_wint_t
|
#undef __need_wint_t
|
||||||
#endif /* __need_wint_t */
|
#endif /* __need_wint_t */
|
||||||
|
@ -30,7 +30,48 @@
|
|||||||
*/
|
*/
|
||||||
#if __STDC_HOSTED__ && \
|
#if __STDC_HOSTED__ && \
|
||||||
defined(__has_include_next) && __has_include_next(<stdint.h>)
|
defined(__has_include_next) && __has_include_next(<stdint.h>)
|
||||||
|
|
||||||
|
// C99 7.18.3 Limits of other integer types
|
||||||
|
//
|
||||||
|
// Footnote 219, 220: C++ implementations should define these macros only when
|
||||||
|
// __STDC_LIMIT_MACROS is defined before <stdint.h> is included.
|
||||||
|
//
|
||||||
|
// Footnote 222: C++ implementations should define these macros only when
|
||||||
|
// __STDC_CONSTANT_MACROS is defined before <stdint.h> is included.
|
||||||
|
//
|
||||||
|
// C++11 [cstdint.syn]p2:
|
||||||
|
//
|
||||||
|
// The macros defined by <cstdint> are provided unconditionally. In particular,
|
||||||
|
// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in
|
||||||
|
// footnotes 219, 220, and 222 in the C standard) play no role in C++.
|
||||||
|
//
|
||||||
|
// C11 removed the problematic footnotes.
|
||||||
|
//
|
||||||
|
// Work around this inconsistency by always defining those macros in C++ mode,
|
||||||
|
// so that a C library implementation which follows the C99 standard can be
|
||||||
|
// used in C++.
|
||||||
|
# ifdef __cplusplus
|
||||||
|
# if !defined(__STDC_LIMIT_MACROS)
|
||||||
|
# define __STDC_LIMIT_MACROS
|
||||||
|
# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG
|
||||||
|
# endif
|
||||||
|
# if !defined(__STDC_CONSTANT_MACROS)
|
||||||
|
# define __STDC_CONSTANT_MACROS
|
||||||
|
# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
|
||||||
# include_next <stdint.h>
|
# include_next <stdint.h>
|
||||||
|
|
||||||
|
# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG
|
||||||
|
# undef __STDC_LIMIT_MACROS
|
||||||
|
# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG
|
||||||
|
# endif
|
||||||
|
# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG
|
||||||
|
# undef __STDC_CONSTANT_MACROS
|
||||||
|
# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG
|
||||||
|
# endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
/* C99 7.18.1.1 Exact-width integer types.
|
/* C99 7.18.1.1 Exact-width integer types.
|
||||||
@ -626,6 +667,12 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
|||||||
#define PTRDIFF_MAX __INTN_MAX(__PTRDIFF_WIDTH__)
|
#define PTRDIFF_MAX __INTN_MAX(__PTRDIFF_WIDTH__)
|
||||||
#define SIZE_MAX __UINTN_MAX(__SIZE_WIDTH__)
|
#define SIZE_MAX __UINTN_MAX(__SIZE_WIDTH__)
|
||||||
|
|
||||||
|
/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__
|
||||||
|
* is enabled. */
|
||||||
|
#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1
|
||||||
|
#define RSIZE_MAX (SIZE_MAX >> 1)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* C99 7.18.2.5 Limits of greatest-width integer types. */
|
/* C99 7.18.2.5 Limits of greatest-width integer types. */
|
||||||
#define INTMAX_MIN __INTN_MIN(__INTMAX_WIDTH__)
|
#define INTMAX_MIN __INTN_MIN(__INTMAX_WIDTH__)
|
||||||
#define INTMAX_MAX __INTN_MAX(__INTMAX_WIDTH__)
|
#define INTMAX_MAX __INTN_MAX(__INTMAX_WIDTH__)
|
||||||
|
30
python/clang_includes/stdnoreturn.h
Normal file
30
python/clang_includes/stdnoreturn.h
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------===
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*===-----------------------------------------------------------------------===
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __STDNORETURN_H
|
||||||
|
#define __STDNORETURN_H
|
||||||
|
|
||||||
|
#define noreturn _Noreturn
|
||||||
|
#define __noreturn_is_defined 1
|
||||||
|
|
||||||
|
#endif /* __STDNORETURN_H */
|
@ -31,39 +31,39 @@
|
|||||||
#include <pmmintrin.h>
|
#include <pmmintrin.h>
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_abs_pi8(__m64 a)
|
_mm_abs_pi8(__m64 __a)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_pabsb((__v8qi)a);
|
return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_abs_epi8(__m128i a)
|
_mm_abs_epi8(__m128i __a)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_pabsb128((__v16qi)a);
|
return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_abs_pi16(__m64 a)
|
_mm_abs_pi16(__m64 __a)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_pabsw((__v4hi)a);
|
return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_abs_epi16(__m128i a)
|
_mm_abs_epi16(__m128i __a)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_pabsw128((__v8hi)a);
|
return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_abs_pi32(__m64 a)
|
_mm_abs_pi32(__m64 __a)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_pabsd((__v2si)a);
|
return (__m64)__builtin_ia32_pabsd((__v2si)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_abs_epi32(__m128i a)
|
_mm_abs_epi32(__m128i __a)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_pabsd128((__v4si)a);
|
return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
|
#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
|
||||||
@ -77,147 +77,147 @@ _mm_abs_epi32(__m128i a)
|
|||||||
(__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
|
(__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hadd_epi16(__m128i a, __m128i b)
|
_mm_hadd_epi16(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b);
|
return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hadd_epi32(__m128i a, __m128i b)
|
_mm_hadd_epi32(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b);
|
return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hadd_pi16(__m64 a, __m64 b)
|
_mm_hadd_pi16(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b);
|
return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hadd_pi32(__m64 a, __m64 b)
|
_mm_hadd_pi32(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b);
|
return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hadds_epi16(__m128i a, __m128i b)
|
_mm_hadds_epi16(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b);
|
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hadds_pi16(__m64 a, __m64 b)
|
_mm_hadds_pi16(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b);
|
return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hsub_epi16(__m128i a, __m128i b)
|
_mm_hsub_epi16(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b);
|
return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hsub_epi32(__m128i a, __m128i b)
|
_mm_hsub_epi32(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b);
|
return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hsub_pi16(__m64 a, __m64 b)
|
_mm_hsub_pi16(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b);
|
return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hsub_pi32(__m64 a, __m64 b)
|
_mm_hsub_pi32(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b);
|
return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hsubs_epi16(__m128i a, __m128i b)
|
_mm_hsubs_epi16(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b);
|
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_hsubs_pi16(__m64 a, __m64 b)
|
_mm_hsubs_pi16(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b);
|
return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_maddubs_epi16(__m128i a, __m128i b)
|
_mm_maddubs_epi16(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b);
|
return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_maddubs_pi16(__m64 a, __m64 b)
|
_mm_maddubs_pi16(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b);
|
return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_mulhrs_epi16(__m128i a, __m128i b)
|
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b);
|
return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_mulhrs_pi16(__m64 a, __m64 b)
|
_mm_mulhrs_pi16(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b);
|
return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_shuffle_epi8(__m128i a, __m128i b)
|
_mm_shuffle_epi8(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b);
|
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_shuffle_pi8(__m64 a, __m64 b)
|
_mm_shuffle_pi8(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b);
|
return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_sign_epi8(__m128i a, __m128i b)
|
_mm_sign_epi8(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b);
|
return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_sign_epi16(__m128i a, __m128i b)
|
_mm_sign_epi16(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b);
|
return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_sign_epi32(__m128i a, __m128i b)
|
_mm_sign_epi32(__m128i __a, __m128i __b)
|
||||||
{
|
{
|
||||||
return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b);
|
return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_sign_pi8(__m64 a, __m64 b)
|
_mm_sign_pi8(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b);
|
return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_sign_pi16(__m64 a, __m64 b)
|
_mm_sign_pi16(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b);
|
return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_sign_pi32(__m64 a, __m64 b)
|
_mm_sign_pi32(__m64 __a, __m64 __b)
|
||||||
{
|
{
|
||||||
return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b);
|
return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* __SSSE3__ */
|
#endif /* __SSSE3__ */
|
||||||
|
@ -23,6 +23,9 @@
|
|||||||
|
|
||||||
/* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/
|
/* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/
|
||||||
|
|
||||||
|
#ifndef __CLANG_UNWIND_H
|
||||||
|
#define __CLANG_UNWIND_H
|
||||||
|
|
||||||
#if __has_include_next(<unwind.h>)
|
#if __has_include_next(<unwind.h>)
|
||||||
/* Darwin and libunwind provide an unwind.h. If that's available, use
|
/* Darwin and libunwind provide an unwind.h. If that's available, use
|
||||||
* it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
|
* it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
|
||||||
@ -59,7 +62,9 @@ extern "C" {
|
|||||||
/* It is a bit strange for a header to play with the visibility of the
|
/* It is a bit strange for a header to play with the visibility of the
|
||||||
symbols it declares, but this matches gcc's behavior and some programs
|
symbols it declares, but this matches gcc's behavior and some programs
|
||||||
depend on it */
|
depend on it */
|
||||||
|
#ifndef HIDE_EXPORTS
|
||||||
#pragma GCC visibility push(default)
|
#pragma GCC visibility push(default)
|
||||||
|
#endif
|
||||||
|
|
||||||
struct _Unwind_Context;
|
struct _Unwind_Context;
|
||||||
typedef enum {
|
typedef enum {
|
||||||
@ -100,25 +105,29 @@ typedef enum {
|
|||||||
_UVRSR_FAILED = 2
|
_UVRSR_FAILED = 2
|
||||||
} _Unwind_VRS_Result;
|
} _Unwind_VRS_Result;
|
||||||
|
|
||||||
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *context,
|
_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
|
||||||
_Unwind_VRS_RegClass regclass,
|
_Unwind_VRS_RegClass __regclass,
|
||||||
uint32_t regno,
|
uint32_t __regno,
|
||||||
_Unwind_VRS_DataRepresentation representation,
|
_Unwind_VRS_DataRepresentation __representation,
|
||||||
void *valuep);
|
void *__valuep);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
uintptr_t _Unwind_GetIP(struct _Unwind_Context* context);
|
uintptr_t _Unwind_GetIP(struct _Unwind_Context* __context);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context*, void*);
|
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context*, void*);
|
||||||
_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void*);
|
_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void*);
|
||||||
|
|
||||||
|
#ifndef HIDE_EXPORTS
|
||||||
#pragma GCC visibility pop
|
#pragma GCC visibility pop
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif /* __CLANG_UNWIND_H */
|
||||||
|
@ -46,6 +46,14 @@
|
|||||||
#include <popcntintrin.h>
|
#include <popcntintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __RDSEED__
|
||||||
|
#include <rdseedintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __PRFCHW__
|
||||||
|
#include <prfchwintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __SSE4A__
|
#ifdef __SSE4A__
|
||||||
#include <ammintrin.h>
|
#include <ammintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
|||||||
/*===---- xopintrin.h - FMA4 intrinsics ------------------------------------===
|
/*===---- xopintrin.h - XOP intrinsics -------------------------------------===
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
@ -22,7 +22,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __X86INTRIN_H
|
#ifndef __X86INTRIN_H
|
||||||
#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
|
#error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __XOPINTRIN_H
|
#ifndef __XOPINTRIN_H
|
||||||
|
Loading…
Reference in New Issue
Block a user