Updating local clang includes to 3.2

We have to have these beneath the folder where ycm_core.so is placed so that we
get fast completions. If these files are not here, then clang fails to
precompile a file preamble and completions are slow.
This commit is contained in:
Strahinja Val Markovic 2013-01-13 12:49:49 -08:00
parent ccc890a634
commit fa21622655
41 changed files with 6290 additions and 35 deletions

View File

@ -1,4 +1,4 @@
/*===---- wmmintrin.h - AES intrinsics ------------------------------------=== /*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------===
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
@ -20,16 +20,15 @@
* *
*===-----------------------------------------------------------------------=== *===-----------------------------------------------------------------------===
*/ */
#ifndef _WMMINTRIN_AES_H
#define _WMMINTRIN_AES_H
#ifndef _WMMINTRIN_H #include <emmintrin.h>
#define _WMMINTRIN_H
#if !defined (__AES__) #if !defined (__AES__)
# error "AES instructions not enabled" # error "AES instructions not enabled"
#else #else
#include <xmmintrin.h>
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_aesenc_si128(__m128i __V, __m128i __R) _mm_aesenc_si128(__m128i __V, __m128i __R)
{ {
@ -63,5 +62,6 @@ _mm_aesimc_si128(__m128i __V)
#define _mm_aeskeygenassist_si128(C, R) \ #define _mm_aeskeygenassist_si128(C, R) \
__builtin_ia32_aeskeygenassist128((C), (R)) __builtin_ia32_aeskeygenassist128((C), (R))
#endif /* __AES__ */ #endif
#endif /* _WMMINTRIN_H */
#endif /* _WMMINTRIN_AES_H */

View File

@ -0,0 +1,34 @@
/*===---- __wmmintrin_pclmul.h - AES intrinsics ----------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef _WMMINTRIN_PCLMUL_H
#define _WMMINTRIN_PCLMUL_H
#if !defined (__PCLMUL__)
# error "PCLMUL instruction is not enabled"
#else
#define _mm_clmulepi64_si128(__X, __Y, __I) \
((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
(__v2di)(__m128i)(__Y), (char)(__I)))
#endif
#endif /* _WMMINTRIN_PCLMUL_H */

View File

@ -4363,14 +4363,14 @@ vec_perm(vector float a, vector float b, vector unsigned char c)
/* vec_vperm */ /* vec_vperm */
vector signed char __ATTRS_o_ai static vector signed char __ATTRS_o_ai
vec_vperm(vector signed char a, vector signed char b, vector unsigned char c) vec_vperm(vector signed char a, vector signed char b, vector unsigned char c)
{ {
return (vector signed char) return (vector signed char)
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector unsigned char __ATTRS_o_ai static vector unsigned char __ATTRS_o_ai
vec_vperm(vector unsigned char a, vec_vperm(vector unsigned char a,
vector unsigned char b, vector unsigned char b,
vector unsigned char c) vector unsigned char c)
@ -4379,21 +4379,21 @@ vec_vperm(vector unsigned char a,
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector bool char __ATTRS_o_ai static vector bool char __ATTRS_o_ai
vec_vperm(vector bool char a, vector bool char b, vector unsigned char c) vec_vperm(vector bool char a, vector bool char b, vector unsigned char c)
{ {
return (vector bool char) return (vector bool char)
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector short __ATTRS_o_ai static vector short __ATTRS_o_ai
vec_vperm(vector short a, vector short b, vector unsigned char c) vec_vperm(vector short a, vector short b, vector unsigned char c)
{ {
return (vector short) return (vector short)
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector unsigned short __ATTRS_o_ai static vector unsigned short __ATTRS_o_ai
vec_vperm(vector unsigned short a, vec_vperm(vector unsigned short a,
vector unsigned short b, vector unsigned short b,
vector unsigned char c) vector unsigned char c)
@ -4402,41 +4402,41 @@ vec_vperm(vector unsigned short a,
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector bool short __ATTRS_o_ai static vector bool short __ATTRS_o_ai
vec_vperm(vector bool short a, vector bool short b, vector unsigned char c) vec_vperm(vector bool short a, vector bool short b, vector unsigned char c)
{ {
return (vector bool short) return (vector bool short)
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector pixel __ATTRS_o_ai static vector pixel __ATTRS_o_ai
vec_vperm(vector pixel a, vector pixel b, vector unsigned char c) vec_vperm(vector pixel a, vector pixel b, vector unsigned char c)
{ {
return (vector pixel) return (vector pixel)
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector int __ATTRS_o_ai static vector int __ATTRS_o_ai
vec_vperm(vector int a, vector int b, vector unsigned char c) vec_vperm(vector int a, vector int b, vector unsigned char c)
{ {
return (vector int)__builtin_altivec_vperm_4si(a, b, c); return (vector int)__builtin_altivec_vperm_4si(a, b, c);
} }
vector unsigned int __ATTRS_o_ai static vector unsigned int __ATTRS_o_ai
vec_vperm(vector unsigned int a, vector unsigned int b, vector unsigned char c) vec_vperm(vector unsigned int a, vector unsigned int b, vector unsigned char c)
{ {
return (vector unsigned int) return (vector unsigned int)
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector bool int __ATTRS_o_ai static vector bool int __ATTRS_o_ai
vec_vperm(vector bool int a, vector bool int b, vector unsigned char c) vec_vperm(vector bool int a, vector bool int b, vector unsigned char c)
{ {
return (vector bool int) return (vector bool int)
__builtin_altivec_vperm_4si((vector int)a, (vector int)b, c); __builtin_altivec_vperm_4si((vector int)a, (vector int)b, c);
} }
vector float __ATTRS_o_ai static vector float __ATTRS_o_ai
vec_vperm(vector float a, vector float b, vector unsigned char c) vec_vperm(vector float a, vector float b, vector unsigned char c)
{ {
return (vector float) return (vector float)
@ -4445,7 +4445,7 @@ vec_vperm(vector float a, vector float b, vector unsigned char c)
/* vec_re */ /* vec_re */
vector float __attribute__((__always_inline__)) static vector float __attribute__((__always_inline__))
vec_re(vector float a) vec_re(vector float a)
{ {
return __builtin_altivec_vrefp(a); return __builtin_altivec_vrefp(a);
@ -4453,7 +4453,7 @@ vec_re(vector float a)
/* vec_vrefp */ /* vec_vrefp */
vector float __attribute__((__always_inline__)) static vector float __attribute__((__always_inline__))
vec_vrefp(vector float a) vec_vrefp(vector float a)
{ {
return __builtin_altivec_vrefp(a); return __builtin_altivec_vrefp(a);

View File

@ -0,0 +1,68 @@
/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __AMMINTRIN_H
#define __AMMINTRIN_H
#ifndef __SSE4A__
#error "SSE4A instruction set not enabled"
#else
#include <pmmintrin.h>
#define _mm_extracti_si64(x, len, idx) \
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
(char)(len), (char)(idx)))
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_extract_si64(__m128i __x, __m128i __y)
{
return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
}
#define _mm_inserti_si64(x, y, len, idx) \
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
(__v2di)(__m128i)(y), \
(char)(len), (char)(idx)))
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_insert_si64(__m128i __x, __m128i __y)
{
return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_sd(double *__p, __m128d __a)
{
__builtin_ia32_movntsd(__p, (__v2df)__a);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_ss(float *__p, __m128 __a)
{
__builtin_ia32_movntss(__p, (__v4sf)__a);
}
#endif /* __SSE4A__ */
#endif /* __AMMINTRIN_H */

File diff suppressed because it is too large Load Diff

View File

@ -959,3 +959,243 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
{ {
return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); return (__m128i)__builtin_ia32_psrlv2di(__X, __Y);
} }
#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
__m128d __a = (a); \
double const *__m = (m); \
__m128i __i = (i); \
__m128d __mask = (mask); \
(__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \
(__v4si)__i, (__v2df)__mask, (s)); })
#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
__m256d __a = (a); \
double const *__m = (m); \
__m128i __i = (i); \
__m256d __mask = (mask); \
(__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \
(__v4si)__i, (__v4df)__mask, (s)); })
#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
__m128d __a = (a); \
double const *__m = (m); \
__m128i __i = (i); \
__m128d __mask = (mask); \
(__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \
(__v2di)__i, (__v2df)__mask, (s)); })
#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
__m256d __a = (a); \
double const *__m = (m); \
__m256i __i = (i); \
__m256d __mask = (mask); \
(__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \
(__v4di)__i, (__v4df)__mask, (s)); })
#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
__m128 __a = (a); \
float const *__m = (m); \
__m128i __i = (i); \
__m128 __mask = (mask); \
(__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \
(__v4si)__i, (__v4sf)__mask, (s)); })
#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
__m256 __a = (a); \
float const *__m = (m); \
__m256i __i = (i); \
__m256 __mask = (mask); \
(__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \
(__v8si)__i, (__v8sf)__mask, (s)); })
#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
__m128 __a = (a); \
float const *__m = (m); \
__m128i __i = (i); \
__m128 __mask = (mask); \
(__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \
(__v2di)__i, (__v4sf)__mask, (s)); })
#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
__m128 __a = (a); \
float const *__m = (m); \
__m256i __i = (i); \
__m128 __mask = (mask); \
(__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \
(__v4di)__i, (__v4sf)__mask, (s)); })
#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
__m128i __a = (a); \
int const *__m = (m); \
__m128i __i = (i); \
__m128i __mask = (mask); \
(__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \
(__v4si)__i, (__v4si)__mask, (s)); })
#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
__m256i __a = (a); \
int const *__m = (m); \
__m256i __i = (i); \
__m256i __mask = (mask); \
(__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \
(__v8si)__i, (__v8si)__mask, (s)); })
#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
__m128i __a = (a); \
int const *__m = (m); \
__m128i __i = (i); \
__m128i __mask = (mask); \
(__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \
(__v2di)__i, (__v4si)__mask, (s)); })
#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
__m128i __a = (a); \
int const *__m = (m); \
__m256i __i = (i); \
__m128i __mask = (mask); \
(__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \
(__v4di)__i, (__v4si)__mask, (s)); })
#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
__m128i __a = (a); \
int const *__m = (m); \
__m128i __i = (i); \
__m128i __mask = (mask); \
(__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \
(__v4si)__i, (__v2di)__mask, (s)); })
#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
__m256i __a = (a); \
int const *__m = (m); \
__m128i __i = (i); \
__m256i __mask = (mask); \
(__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \
(__v4si)__i, (__v4di)__mask, (s)); })
#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
__m128i __a = (a); \
int const *__m = (m); \
__m128i __i = (i); \
__m128i __mask = (mask); \
(__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \
(__v2di)__i, (__v2di)__mask, (s)); })
#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
__m256i __a = (a); \
int const *__m = (m); \
__m256i __i = (i); \
__m256i __mask = (mask); \
(__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \
(__v4di)__i, (__v4di)__mask, (s)); })
#define _mm_i32gather_pd(m, i, s) __extension__ ({ \
double const *__m = (m); \
__m128i __i = (i); \
(__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \
(const __v2df *)__m, (__v4si)__i, \
(__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \
double const *__m = (m); \
__m128i __i = (i); \
(__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \
(const __v4df *)__m, (__v4si)__i, \
(__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
#define _mm_i64gather_pd(m, i, s) __extension__ ({ \
double const *__m = (m); \
__m128i __i = (i); \
(__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \
(const __v2df *)__m, (__v2di)__i, \
(__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \
double const *__m = (m); \
__m256i __i = (i); \
(__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \
(const __v4df *)__m, (__v4di)__i, \
(__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
#define _mm_i32gather_ps(m, i, s) __extension__ ({ \
float const *__m = (m); \
__m128i __i = (i); \
(__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \
(const __v4sf *)__m, (__v4si)__i, \
(__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \
float const *__m = (m); \
__m256i __i = (i); \
(__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \
(const __v8sf *)__m, (__v8si)__i, \
(__v8sf)_mm256_set1_ps((float)(int)-1), (s)); })
#define _mm_i64gather_ps(m, i, s) __extension__ ({ \
float const *__m = (m); \
__m128i __i = (i); \
(__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \
(const __v4sf *)__m, (__v2di)__i, \
(__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \
float const *__m = (m); \
__m256i __i = (i); \
(__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \
(const __v4sf *)__m, (__v4di)__i, \
(__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \
int const *__m = (m); \
__m128i __i = (i); \
(__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \
(const __v4si *)__m, (__v4si)__i, \
(__v4si)_mm_set1_epi32(-1), (s)); })
#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \
int const *__m = (m); \
__m256i __i = (i); \
(__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \
(const __v8si *)__m, (__v8si)__i, \
(__v8si)_mm256_set1_epi32(-1), (s)); })
#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \
int const *__m = (m); \
__m128i __i = (i); \
(__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \
(const __v4si *)__m, (__v2di)__i, \
(__v4si)_mm_set1_epi32(-1), (s)); })
#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \
int const *__m = (m); \
__m256i __i = (i); \
(__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \
(const __v4si *)__m, (__v4di)__i, \
(__v4si)_mm_set1_epi32(-1), (s)); })
#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \
int const *__m = (m); \
__m128i __i = (i); \
(__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \
(const __v2di *)__m, (__v4si)__i, \
(__v2di)_mm_set1_epi64x(-1), (s)); })
#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \
int const *__m = (m); \
__m128i __i = (i); \
(__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \
(const __v4di *)__m, (__v4si)__i, \
(__v4di)_mm256_set1_epi64x(-1), (s)); })
#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \
int const *__m = (m); \
__m128i __i = (i); \
(__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \
(const __v2di *)__m, (__v2di)__i, \
(__v2di)_mm_set1_epi64x(-1), (s)); })
#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \
int const *__m = (m); \
__m256i __i = (i); \
(__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \
(const __v4di *)__m, (__v4di)__i, \
(__v4di)_mm256_set1_epi64x(-1), (s)); })

View File

@ -70,6 +70,25 @@ _pext_u64(unsigned long long __X, unsigned long long __Y)
return __builtin_ia32_pext_di(__X, __Y); return __builtin_ia32_pext_di(__X, __Y);
} }
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
unsigned long long *__P)
{
unsigned __int128 __res = (unsigned __int128) __X * __Y;
*__P = (unsigned long long) (__res >> 64);
return (unsigned long long) __res;
}
#else /* !__x86_64__ */
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
{
unsigned long long __res = (unsigned long long) __X * __Y;
*__P = (unsigned int) (__res >> 32);
return (unsigned int) __res;
}
#endif /* !__x86_64__ */ #endif /* !__x86_64__ */
#endif /* __BMI2INTRIN_H */ #endif /* __BMI2INTRIN_H */

View File

@ -33,7 +33,7 @@
#define __BMIINTRIN_H #define __BMIINTRIN_H
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
__tzcnt16(unsigned short __X) __tzcnt_u16(unsigned short __X)
{ {
return __builtin_ctzs(__X); return __builtin_ctzs(__X);
} }
@ -69,7 +69,7 @@ __blsr_u32(unsigned int __X)
} }
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__tzcnt32(unsigned int __X) __tzcnt_u32(unsigned int __X)
{ {
return __builtin_ctz(__X); return __builtin_ctz(__X);
} }
@ -106,7 +106,7 @@ __blsr_u64(unsigned long long __X)
} }
static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
__tzcnt64(unsigned long long __X) __tzcnt_u64(unsigned long long __X)
{ {
return __builtin_ctzll(__X); return __builtin_ctzll(__X);
} }

View File

@ -28,6 +28,6 @@
static inline int __get_cpuid (unsigned int level, unsigned int *eax, static inline int __get_cpuid (unsigned int level, unsigned int *eax,
unsigned int *ebx, unsigned int *ecx, unsigned int *ebx, unsigned int *ecx,
unsigned int *edx) { unsigned int *edx) {
asm("cpuid" : "=a"(*eax), "=b" (*ebx), "=c"(*ecx), "=d"(*edx) : "0"(level)); __asm("cpuid" : "=a"(*eax), "=b" (*ebx), "=c"(*ecx), "=d"(*edx) : "0"(level));
return 1; return 1;
} }

View File

@ -1186,7 +1186,10 @@ _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storel_epi64(__m128i *p, __m128i a) _mm_storel_epi64(__m128i *p, __m128i a)
{ {
__builtin_ia32_storelv4si((__v2si *)p, a); struct __mm_storel_epi64_struct {
long long u;
} __attribute__((__packed__, __may_alias__));
((struct __mm_storel_epi64_struct*)p)->u = a[0];
} }
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))

View File

@ -0,0 +1,58 @@
/*===---- f16cintrin.h - F16C intrinsics ---------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
#error "Never use <f16cintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __F16C__
# error "F16C instruction is not enabled"
#endif /* __F16C__ */
#ifndef __F16CINTRIN_H
#define __F16CINTRIN_H
typedef float __v8sf __attribute__ ((__vector_size__ (32)));
typedef float __m256 __attribute__ ((__vector_size__ (32)));
#define _mm_cvtps_ph(a, imm) __extension__ ({ \
__m128 __a = (a); \
(__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); })
#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
__m256 __a = (a); \
(__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
static __inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cvtph_ps(__m128i a)
{
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)a);
}
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_cvtph_ps(__m128i a)
{
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)a);
}
#endif /* __F16CINTRIN_H */

View File

@ -28,7 +28,7 @@
* additional definitions provided for Windows. * additional definitions provided for Windows.
* For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx
*/ */
#if defined(__MINGW32__) && \ #if (defined(__MINGW32__) || defined(_MSC_VER)) && \
defined(__has_include_next) && __has_include_next(<float.h>) defined(__has_include_next) && __has_include_next(<float.h>)
# include_next <float.h> # include_next <float.h>

View File

@ -0,0 +1,229 @@
/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __FMAINTRIN_H
#define __FMAINTRIN_H
#ifndef __FMA__
# error "FMA instruction set is not enabled"
#else
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
}
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
}
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
}
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
}
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
}
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
}
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
}
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
}
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
}
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
}
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
}
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
}
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
}
#endif /* __FMA__ */
#endif /* __FMAINTRIN_H */

View File

@ -72,4 +72,34 @@
#include <lzcntintrin.h> #include <lzcntintrin.h>
#endif #endif
#ifdef __FMA__
#include <fmaintrin.h>
#endif
#ifdef __RDRND__
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_rdrand16_step(unsigned short *__p)
{
return __builtin_ia32_rdrand16_step(__p);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_rdrand32_step(unsigned int *__p)
{
return __builtin_ia32_rdrand32_step(__p);
}
#ifdef __x86_64__
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_rdrand64_step(unsigned long long *__p)
{
return __builtin_ia32_rdrand64_step(__p);
}
#endif
#endif /* __RDRND__ */
#ifdef __RTM__
#include <rtmintrin.h>
#endif
#endif /* __IMMINTRIN_H */ #endif /* __IMMINTRIN_H */

View File

@ -25,6 +25,11 @@ module _Builtin_intrinsics [system] {
header "mmintrin.h" header "mmintrin.h"
} }
explicit module f16c {
requires f16c
header "f16cintrin.h"
}
explicit module sse { explicit module sse {
requires sse requires sse
export mmx export mmx
@ -62,6 +67,12 @@ module _Builtin_intrinsics [system] {
header "nmmintrin.h" header "nmmintrin.h"
} }
explicit module sse4a {
requires sse4a
export sse3
header "ammintrin.h"
}
explicit module avx { explicit module avx {
requires avx requires avx
export sse4_2 export sse4_2
@ -84,6 +95,11 @@ module _Builtin_intrinsics [system] {
header "bmi2intrin.h" header "bmi2intrin.h"
} }
explicit module fma {
requires fma
header "fmaintrin.h"
}
explicit module fma4 { explicit module fma4 {
requires fma4 requires fma4
export sse3 export sse3
@ -104,5 +120,26 @@ module _Builtin_intrinsics [system] {
requires mm3dnow requires mm3dnow
header "mm3dnow.h" header "mm3dnow.h"
} }
explicit module xop {
requires xop
export fma4
header "xopintrin.h"
}
explicit module aes_pclmul {
requires aes, pclmul
header "wmmintrin.h"
}
explicit module aes {
requires aes
header "__wmmintrin_aes.h"
}
explicit module pclmul {
requires pclmul
header "__wmmintrin_pclmul.h"
}
} }
} }

View File

@ -0,0 +1,49 @@
/*===---- rtmintrin.h - RTM intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
#endif
#define _XBEGIN_STARTED (~0u)
#define _XABORT_EXPLICIT (1 << 0)
#define _XABORT_RETRY (1 << 1)
#define _XABORT_CONFLICT (1 << 2)
#define _XABORT_CAPACITY (1 << 3)
#define _XABORT_DEBUG (1 << 4)
#define _XABORT_NESTED (1 << 5)
#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
_xbegin(void)
{
return __builtin_ia32_xbegin();
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_xend(void)
{
__builtin_ia32_xend();
}
#define _xabort(imm) __builtin_ia32_xabort((imm))

View File

@ -43,10 +43,20 @@ typedef __WCHAR_TYPE__ wchar_t;
#undef NULL #undef NULL
#ifdef __cplusplus #ifdef __cplusplus
#undef __null // VC++ hack. # if !defined(__MINGW32__) && !defined(_MSC_VER)
#define NULL __null # define NULL __null
# else
# define NULL 0
# endif
#else #else
#define NULL ((void*)0) # define NULL ((void*)0)
#endif
#ifdef __cplusplus
#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)
namespace std { typedef decltype(nullptr) nullptr_t; }
using ::std::nullptr_t;
#endif
#endif #endif
#define offsetof(t, d) __builtin_offsetof(t, d) #define offsetof(t, d) __builtin_offsetof(t, d)

View File

@ -100,7 +100,7 @@ typedef enum {
_UVRSR_FAILED = 2 _UVRSR_FAILED = 2
} _Unwind_VRS_Result; } _Unwind_VRS_Result;
_Unwind_VRS_Result _Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *context,
_Unwind_VRS_RegClass regclass, _Unwind_VRS_RegClass regclass,
uint32_t regno, uint32_t regno,
_Unwind_VRS_DataRepresentation representation, _Unwind_VRS_DataRepresentation representation,

View File

@ -0,0 +1,42 @@
/*===---- wmmintrin.h - AES intrinsics ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef _WMMINTRIN_H
#define _WMMINTRIN_H
#include <emmintrin.h>
#if !defined (__AES__) && !defined (__PCLMUL__)
# error "AES/PCLMUL instructions not enabled"
#else
#ifdef __AES__
#include <__wmmintrin_aes.h>
#endif /* __AES__ */
#ifdef __PCLMUL__
#include <__wmmintrin_pclmul.h>
#endif /* __PCLMUL__ */
#endif /* __AES__ || __PCLMUL__ */
#endif /* _WMMINTRIN_H */

View File

@ -46,10 +46,22 @@
#include <popcntintrin.h> #include <popcntintrin.h>
#endif #endif
#ifdef __SSE4A__
#include <ammintrin.h>
#endif
#ifdef __FMA4__ #ifdef __FMA4__
#include <fma4intrin.h> #include <fma4intrin.h>
#endif #endif
// FIXME: SSE4A, XOP, LWP, ABM #ifdef __XOP__
#include <xopintrin.h>
#endif
#ifdef __F16C__
#include <f16cintrin.h>
#endif
// FIXME: LWP
#endif /* __X86INTRIN_H */ #endif /* __X86INTRIN_H */

View File

@ -95,7 +95,8 @@ _mm_div_ps(__m128 a, __m128 b)
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_sqrt_ss(__m128 a) _mm_sqrt_ss(__m128 a)
{ {
return __builtin_ia32_sqrtss(a); __m128 c = __builtin_ia32_sqrtss(a);
return (__m128) { c[0], a[1], a[2], a[3] };
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@ -107,7 +108,8 @@ _mm_sqrt_ps(__m128 a)
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_rcp_ss(__m128 a) _mm_rcp_ss(__m128 a)
{ {
return __builtin_ia32_rcpss(a); __m128 c = __builtin_ia32_rcpss(a);
return (__m128) { c[0], a[1], a[2], a[3] };
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@ -119,7 +121,8 @@ _mm_rcp_ps(__m128 a)
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_rsqrt_ss(__m128 a) _mm_rsqrt_ss(__m128 a)
{ {
return __builtin_ia32_rsqrtss(a); __m128 c = __builtin_ia32_rsqrtss(a);
return (__m128) { c[0], a[1], a[2], a[3] };
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))

View File

@ -0,0 +1,411 @@
/*===---- xopintrin.h - FMA4 intrinsics ------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __X86INTRIN_H
#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __XOPINTRIN_H
#define __XOPINTRIN_H
#ifndef __XOP__
# error "XOP instruction set is not enabled"
#else
#include <fma4intrin.h>
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddw_epi8(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddd_epi8(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epi8(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddd_epi16(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epi16(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epi32(__m128i __A)
{
return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddw_epu8(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddd_epu8(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epu8(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddd_epu16(__m128i __A)
{
return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epu16(__m128i __A)
{
return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_haddq_epu32(__m128i __A)
{
return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubw_epi8(__m128i __A)
{
return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubd_epi16(__m128i __A)
{
return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubq_epi32(__m128i __A)
{
return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
}
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_rot_epi8(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_rot_epi16(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_rot_epi32(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_rot_epi64(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
}
#define _mm_roti_epi8(A, N) __extension__ ({ \
__m128i __A = (A); \
(__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); })
#define _mm_roti_epi16(A, N) __extension__ ({ \
__m128i __A = (A); \
(__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); })
#define _mm_roti_epi32(A, N) __extension__ ({ \
__m128i __A = (A); \
(__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); })
#define _mm_roti_epi64(A, N) __extension__ ({ \
__m128i __A = (A); \
(__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shl_epi8(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shl_epi16(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shl_epi32(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shl_epi64(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha_epi8(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha_epi16(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha_epi32(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha_epi64(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
}
#define _mm_com_epu8(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); })
#define _mm_com_epu16(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); })
#define _mm_com_epu32(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); })
#define _mm_com_epu64(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); })
#define _mm_com_epi8(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); })
#define _mm_com_epi16(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); })
#define _mm_com_epi32(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); })
#define _mm_com_epi64(A, B, N) __extension__ ({ \
__m128i __A = (A); \
__m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
__m128d __X = (X); \
__m128d __Y = (Y); \
__m128i __C = (C); \
(__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \
(__v2di)__C, (I)); })
#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
__m256d __X = (X); \
__m256d __Y = (Y); \
__m256i __C = (C); \
(__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \
(__v4di)__C, (I)); })
#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
__m128 __X = (X); \
__m128 __Y = (Y); \
__m128i __C = (C); \
(__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \
(__v4si)__C, (I)); })
#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
__m256 __X = (X); \
__m256 __Y = (Y); \
__m256i __C = (C); \
(__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
(__v8si)__C, (I)); })
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_frcz_ss(__m128 __A)
{
return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_frcz_sd(__m128d __A)
{
return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_frcz_ps(__m128 __A)
{
return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
}
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_frcz_pd(__m128d __A)
{
return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
}
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_frcz_ps(__m256 __A)
{
return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
}
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_frcz_pd(__m256d __A)
{
return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
}
#endif /* __XOP__ */
#endif /* __XOPINTRIN_H */