From f30857df22efeadbc05f8a9e0a8960fd796e761a Mon Sep 17 00:00:00 2001 From: Strahinja Val Markovic Date: Sat, 11 Jan 2014 12:26:29 -0800 Subject: [PATCH] Updating to clang 3.4 builtin headers --- python/clang_includes/Intrin.h | 784 +++++++++++++++++++++++++++ python/clang_includes/avx2intrin.h | 25 +- python/clang_includes/avxintrin.h | 22 +- python/clang_includes/cpuid.h | 126 ++++- python/clang_includes/emmintrin.h | 36 +- python/clang_includes/f16cintrin.h | 4 +- python/clang_includes/immintrin.h | 4 + python/clang_includes/limits.h | 6 +- python/clang_includes/module.map | 11 + python/clang_includes/prfchwintrin.h | 5 + python/clang_includes/rdseedintrin.h | 4 + python/clang_includes/rtmintrin.h | 5 + python/clang_includes/shaintrin.h | 74 +++ python/clang_includes/smmintrin.h | 15 +- python/clang_includes/tbmintrin.h | 158 ++++++ python/clang_includes/tgmath.h | 6 +- python/clang_includes/unwind.h | 161 +++++- python/clang_includes/x86intrin.h | 4 + python/clang_includes/xmmintrin.h | 18 +- python/clang_includes/xopintrin.h | 393 ++++++++++++++ 20 files changed, 1806 insertions(+), 55 deletions(-) create mode 100644 python/clang_includes/Intrin.h create mode 100644 python/clang_includes/shaintrin.h create mode 100644 python/clang_includes/tbmintrin.h diff --git a/python/clang_includes/Intrin.h b/python/clang_includes/Intrin.h new file mode 100644 index 00000000..43764647 --- /dev/null +++ b/python/clang_includes/Intrin.h @@ -0,0 +1,784 @@ +/* ===-------- Intrin.h ---------------------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +/* Only include this if we're compiling for the windows platform. */ +#ifndef _MSC_VER +#include_next +#else + +#ifndef __INTRIN_H +#define __INTRIN_H + +/* First include the standard intrinsics. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* And the random ones that aren't in those files. */ +__m64 _m_from_float(float); +__m64 _m_from_int(int _l); +void _m_prefetch(void *); +float _m_to_float(__m64); +int _m_to_int(__m64 _M); + +/* Other assorted instruction intrinsics. */ +void __addfsbyte(unsigned long, unsigned char); +void __addfsdword(unsigned long, unsigned long); +void __addfsword(unsigned long, unsigned short); +void __code_seg(const char *); +void __cpuid(int[4], int); +void __cpuidex(int[4], int, int); +void __debugbreak(void); +__int64 __emul(int, int); +unsigned __int64 __emulu(unsigned int, unsigned int); +void __cdecl __fastfail(unsigned int); +unsigned int __getcallerseflags(void); +void __halt(void); +unsigned char __inbyte(unsigned short); +void __inbytestring(unsigned short, unsigned char *, unsigned long); +void __incfsbyte(unsigned long); +void __incfsdword(unsigned long); +void __incfsword(unsigned long); +unsigned long __indword(unsigned short); +void __indwordstring(unsigned short, unsigned long *, unsigned long); +void __int2c(void); +void __invlpg(void *); +unsigned short __inword(unsigned short); +void __inwordstring(unsigned short, unsigned short *, unsigned long); +void __lidt(void *); +unsigned __int64 __ll_lshift(unsigned __int64, int); +__int64 __ll_rshift(__int64, int); +void __llwpcb(void *); +unsigned char __lwpins32(unsigned int, unsigned int, unsigned int); +void __lwpval32(unsigned int, unsigned int, unsigned int); +unsigned int __lzcnt(unsigned int); +unsigned short __lzcnt16(unsigned short); +void __movsb(unsigned char *, unsigned char const *, size_t); +void __movsd(unsigned long *, unsigned long const *, size_t); +void __movsw(unsigned short *, unsigned short const *, size_t); +void __nop(void); +void __nvreg_restore_fence(void); +void __nvreg_save_fence(void); +void __outbyte(unsigned short, unsigned char); +void __outbytestring(unsigned short, unsigned char *, unsigned long); +void __outdword(unsigned short, unsigned long); +void __outdwordstring(unsigned short, unsigned long *, unsigned long); +void __outword(unsigned short, unsigned short); +void __outwordstring(unsigned short, unsigned short *, unsigned long); +static __inline__ +unsigned int __popcnt(unsigned int); +static __inline__ +unsigned short __popcnt16(unsigned short); +unsigned __int64 __rdtsc(void); +unsigned __int64 __rdtscp(unsigned int *); +unsigned long __readcr0(void); +unsigned long __readcr2(void); +unsigned long __readcr3(void); +unsigned long __readcr5(void); +unsigned long __readcr8(void); +unsigned int __readdr(unsigned int); +unsigned int __readeflags(void); +unsigned char __readfsbyte(unsigned long); +unsigned long __readfsdword(unsigned long); +unsigned __int64 __readfsqword(unsigned long); +unsigned short __readfsword(unsigned long); +unsigned __int64 __readmsr(unsigned long); +unsigned __int64 __readpmc(unsigned long); +unsigned long __segmentlimit(unsigned long); +void __sidt(void *); +void *__slwpcb(void); +void __stosb(unsigned char *, unsigned char, size_t); +void __stosd(unsigned long *, unsigned long, size_t); +void __stosw(unsigned short *, unsigned short, size_t); +void __svm_clgi(void); +void __svm_invlpga(void *, int); +void __svm_skinit(int); +void __svm_stgi(void); +void __svm_vmload(size_t); +void __svm_vmrun(size_t); +void __svm_vmsave(size_t); +void __ud2(void); +unsigned __int64 __ull_rshift(unsigned __int64, int); +void __vmx_off(void); +void __vmx_vmptrst(unsigned __int64 *); +void __wbinvd(void); +void __writecr0(unsigned int); +void __writecr3(unsigned int); +void __writecr4(unsigned int); +void __writecr8(unsigned int); +void __writedr(unsigned int, unsigned int); +void __writeeflags(unsigned int); +void __writefsbyte(unsigned long, unsigned char); +void __writefsdword(unsigned long, unsigned long); +void __writefsqword(unsigned long, unsigned __int64); +void __writefsword(unsigned long, unsigned short); +void __writemsr(unsigned long, unsigned __int64); +static __inline__ +void *_AddressOfReturnAddress(void); +unsigned int _andn_u32(unsigned int, unsigned int); +unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); +unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); +unsigned int _bextri_u32(unsigned int, unsigned int); +static __inline__ +unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); +static __inline__ +unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); +static __inline__ +unsigned char _bittest(long const *, long); +static __inline__ +unsigned char _bittestandcomplement(long *, long); +static __inline__ +unsigned char _bittestandreset(long *, long); +static __inline__ +unsigned char _bittestandset(long *, long); +unsigned int _blcfill_u32(unsigned int); +unsigned int _blci_u32(unsigned int); +unsigned int _blcic_u32(unsigned int); +unsigned int _blcmsk_u32(unsigned int); +unsigned int _blcs_u32(unsigned int); +unsigned int _blsfill_u32(unsigned int); +unsigned int _blsi_u32(unsigned int); +unsigned int _blsic_u32(unsigned int); +unsigned int _blsmsk_u32(unsigned int); +unsigned int _blsmsk_u32(unsigned int); +unsigned int _blsr_u32(unsigned int); +unsigned int _blsr_u32(unsigned int); +unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); +unsigned long __cdecl _byteswap_ulong(unsigned long); +unsigned short __cdecl _byteswap_ushort(unsigned short); +unsigned _bzhi_u32(unsigned int, unsigned int); +void __cdecl _disable(void); +void __cdecl _enable(void); +void __cdecl _fxrstor(void const *); +void __cdecl _fxsave(void *); +long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); +static __inline__ +long _InterlockedAnd(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedAnd16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedAnd8(char volatile *_Value, char _Mask); +unsigned char _interlockedbittestandreset(long volatile *, long); +unsigned char _interlockedbittestandset(long volatile *, long); +static __inline__ +long __cdecl _InterlockedCompareExchange(long volatile *_Destination, + long _Exchange, long _Comparand); +long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long); +long _InterlockedCompareExchange_HLERelease(long volatile *, long, long); +static __inline__ +short _InterlockedCompareExchange16(short volatile *_Destination, + short _Exchange, short _Comparand); +static __inline__ +__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand); +__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64, + __int64); +__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, + __int64); +static __inline__ +char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, + char _Comparand); +void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, + void *); +void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, + void *); +static __inline__ +long __cdecl _InterlockedDecrement(long volatile *_Addend); +static __inline__ +short _InterlockedDecrement16(short volatile *_Addend); +static __inline__ +long __cdecl _InterlockedExchange(long volatile *_Target, long _Value); +static __inline__ +short _InterlockedExchange16(short volatile *_Target, short _Value); +static __inline__ +char _InterlockedExchange8(char volatile *_Target, char _Value); +static __inline__ +long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value); +long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); +long _InterlockedExchangeAdd_HLERelease(long volatile *, long); +static __inline__ +char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value); +static __inline__ +long __cdecl _InterlockedIncrement(long volatile *_Addend); +static __inline__ +short _InterlockedIncrement16(short volatile *_Addend); +static __inline__ +long _InterlockedOr(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedOr16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedOr8(char volatile *_Value, char _Mask); +static __inline__ +long _InterlockedXor(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedXor16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedXor8(char volatile *_Value, char _Mask); +void __cdecl _invpcid(unsigned int, void *); +static __inline__ +unsigned long __cdecl _lrotl(unsigned long, int); +static __inline__ +unsigned long __cdecl _lrotr(unsigned long, int); +static __inline__ +unsigned int _lzcnt_u32(unsigned int); +static __inline__ +void _ReadBarrier(void); +static __inline__ +void _ReadWriteBarrier(void); +static __inline__ +void *_ReturnAddress(void); +unsigned int _rorx_u32(unsigned int, const unsigned int); +int __cdecl _rdrand16_step(unsigned short *); +int __cdecl _rdrand32_step(unsigned int *); +static __inline__ +unsigned int __cdecl _rotl(unsigned int _Value, int _Shift); +static __inline__ +unsigned short _rotl16(unsigned short _Value, unsigned char _Shift); +static __inline__ +unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift); +static __inline__ +unsigned char _rotl8(unsigned char _Value, unsigned char _Shift); +static __inline__ +unsigned int __cdecl _rotr(unsigned int _Value, int _Shift); +static __inline__ +unsigned short _rotr16(unsigned short _Value, unsigned char _Shift); +static __inline__ +unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift); +static __inline__ +unsigned char _rotr8(unsigned char _Value, unsigned char _Shift); +int _sarx_i32(int, unsigned int); + +/* FIXME: Need definition for jmp_buf. + int __cdecl _setjmp(jmp_buf); */ + +unsigned int _shlx_u32(unsigned int, unsigned int); +unsigned int _shrx_u32(unsigned int, unsigned int); +void _Store_HLERelease(long volatile *, long); +void _Store64_HLERelease(__int64 volatile *, __int64); +void _StorePointer_HLERelease(void *volatile *, void *); +unsigned int _t1mskc_u32(unsigned int); +unsigned int _tzcnt_u32(unsigned int); +unsigned int _tzcnt_u32(unsigned int); +unsigned int _tzmsk_u32(unsigned int); +static __inline__ +void _WriteBarrier(void); +void _xabort(const unsigned int imm); +unsigned __int32 xbegin(void); +void _xend(void); +unsigned __int64 __cdecl _xgetbv(unsigned int); +void __cdecl _xrstor(void const *, unsigned __int64); +void __cdecl _xsave(void *, unsigned __int64); +void __cdecl _xsaveopt(void *, unsigned __int64); +void __cdecl _xsetbv(unsigned int, unsigned __int64); +unsigned char _xtest(void); + +/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ +#ifdef __x86_64__ +void __addgsbyte(unsigned long, unsigned char); +void __addgsdword(unsigned long, unsigned long); +void __addgsqword(unsigned long, unsigned __int64); +void __addgsword(unsigned long, unsigned short); +void __faststorefence(void); +void __incgsbyte(unsigned long); +void __incgsdword(unsigned long); +void __incgsqword(unsigned long); +void __incgsword(unsigned long); +unsigned __int64 __popcnt64(unsigned __int64); +unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, + unsigned __int64 _HighPart, + unsigned char _Shift); +unsigned __int64 __shiftright128(unsigned __int64 _LowPart, + unsigned __int64 _HighPart, + unsigned char _Shift); +void __stosq(unsigned __int64 *, unsigned __int64, size_t); +unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64); +unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int); +unsigned __int64 _bextri_u64(unsigned __int64, unsigned int); +static __inline__ +unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); +static __inline__ +unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); +static __inline__ +unsigned char _bittest64(__int64 const *, __int64); +static __inline__ +unsigned char _bittestandcomplement64(__int64 *, __int64); +static __inline__ +unsigned char _bittestandreset64(__int64 *, __int64); +static __inline__ +unsigned char _bittestandset64(__int64 *, __int64); +unsigned __int64 _blcfill_u64(unsigned __int64); +unsigned __int64 _blci_u64(unsigned __int64); +unsigned __int64 _blcic_u64(unsigned __int64); +unsigned __int64 _blcmsk_u64(unsigned __int64); +unsigned __int64 _blcs_u64(unsigned __int64); +unsigned __int64 _blsfill_u64(unsigned __int64); +unsigned __int64 _blsi_u64(unsigned __int64); +unsigned __int64 _blsic_u64(unsigned __int64); +unsigned __int64 _blmsk_u64(unsigned __int64); +unsigned __int64 _blsr_u64(unsigned __int64); +unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); +unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int); +void __cdecl _fxrstor64(void const *); +void __cdecl _fxsave64(void *); +long _InterlockedAnd_np(long volatile *_Value, long _Mask); +short _InterlockedAnd16_np(short volatile *_Value, short _Mask); +__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedAnd8_np(char volatile *_Value, char _Mask); +unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); +unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); +long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, + long _Comparand); +unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_CompareandResult); +unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +short _InterlockedCompareExchange16_np(short volatile *_Destination, + short _Exchange, short _Comparand); +__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand); +void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, + void *_Exchange, void *_Comparand); +long _InterlockedOr_np(long volatile *_Value, long _Mask); +short _InterlockedOr16_np(short volatile *_Value, short _Mask); +__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedOr8_np(char volatile *_Value, char _Mask); +long _InterlockedXor_np(long volatile *_Value, long _Mask); +short _InterlockedXor16_np(short volatile *_Value, short _Mask); +__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedXor8_np(char volatile *_Value, char _Mask); +unsigned __int64 _lzcnt_u64(unsigned __int64); +__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, + __int64 *_HighProduct); +unsigned int __cdecl _readfsbase_u32(void); +unsigned __int64 __cdecl _readfsbase_u64(void); +unsigned int __cdecl _readgsbase_u32(void); +unsigned __int64 __cdecl _readgsbase_u64(void); +unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); +unsigned __int64 _tzcnt_u64(unsigned __int64); +unsigned __int64 _tzmsk_u64(unsigned __int64); +unsigned __int64 _umul128(unsigned __int64 _Multiplier, + unsigned __int64 _Multiplicand, + unsigned __int64 *_HighProduct); +void __cdecl _writefsbase_u32(unsigned int); +void _cdecl _writefsbase_u64(unsigned __int64); +void __cdecl _writegsbase_u32(unsigned int); +void __cdecl _writegsbase_u64(unsigned __int64); +void __cdecl _xrstor64(void const *, unsigned __int64); +void __cdecl _xsave64(void *, unsigned __int64); +void __cdecl _xsaveopt64(void *, unsigned __int64); + +#endif /* __x86_64__ */ + +/*----------------------------------------------------------------------------*\ +|* Bit Twiddling +\*----------------------------------------------------------------------------*/ +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_rotl8(unsigned char _Value, unsigned char _Shift) { + _Shift &= 0x7; + return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_rotr8(unsigned char _Value, unsigned char _Shift) { + _Shift &= 0x7; + return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value; +} +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +_rotl16(unsigned short _Value, unsigned char _Shift) { + _Shift &= 0xf; + return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value; +} +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +_rotr16(unsigned short _Value, unsigned char _Shift) { + _Shift &= 0xf; + return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value; +} +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_rotl(unsigned int _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; +} +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_rotr(unsigned int _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; +} +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) +_lrotl(unsigned long _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; +} +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) +_lrotr(unsigned long _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; +} +static +__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +_rotl64(unsigned __int64 _Value, int _Shift) { + _Shift &= 0x3f; + return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value; +} +static +__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +_rotr64(unsigned __int64 _Value, int _Shift) { + _Shift &= 0x3f; + return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value; +} +/*----------------------------------------------------------------------------*\ +|* Bit Counting and Testing +\*----------------------------------------------------------------------------*/ +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_BitScanForward(unsigned long *_Index, unsigned long _Mask) { + if (!_Mask) + return 0; + *_Index = __builtin_ctzl(_Mask); + return 1; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_BitScanReverse(unsigned long *_Index, unsigned long _Mask) { + if (!_Mask) + return 0; + *_Index = 31 - __builtin_clzl(_Mask); + return 1; +} +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_lzcnt_u32(unsigned int a) { + if (!a) + return 32; + return __builtin_clzl(a); +} +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +__popcnt16(unsigned short value) { + return __builtin_popcount((int)value); +} +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__popcnt(unsigned int value) { + return __builtin_popcount(value); +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittest(long const *a, long b) { + return (*a >> b) & 1; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandcomplement(long *a, long b) { + unsigned char x = (*a >> b) & 1; + *a = *a ^ (1 << b); + return x; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandreset(long *a, long b) { + unsigned char x = (*a >> b) & 1; + *a = *a & ~(1 << b); + return x; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandset(long *a, long b) { + unsigned char x = (*a >> b) & 1; + *a = *a | (1 << b); + return x; +} +#ifdef __x86_64__ +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) { + if (!_Mask) + return 0; + *_Index = __builtin_ctzll(_Mask); + return 1; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { + if (!_Mask) + return 0; + *_Index = 63 - __builtin_clzll(_Mask); + return 1; +} +static +__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +_lzcnt_u64(unsigned __int64 a) { + if (!a) + return 64; + return __builtin_clzll(a); +} +static __inline__ +unsigned __int64 __attribute__((__always_inline__, __nodebug__)) + __popcnt64(unsigned __int64 value) { + return __builtin_popcountll(value); +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittest64(__int64 const *a, __int64 b) { + return (*a >> b) & 1; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandcomplement64(__int64 *a, __int64 b) { + unsigned char x = (*a >> b) & 1; + *a = *a ^ (1ll << b); + return x; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandreset64(__int64 *a, __int64 b) { + unsigned char x = (*a >> b) & 1; + *a = *a & ~(1ll << b); + return x; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandset64(__int64 *a, __int64 b) { + unsigned char x = (*a >> b) & 1; + *a = *a | (1ll << b); + return x; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange Add +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) { + return __atomic_add_fetch(_Addend, _Value, 0) - _Value; +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) { + return __atomic_add_fetch(_Addend, _Value, 0) - _Value; +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeAdd(long volatile *_Addend, long _Value) { + return __atomic_add_fetch(_Addend, _Value, 0) - _Value; +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { + return __atomic_add_fetch(_Addend, _Value, 0) - _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange Sub +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeSub8(char volatile *_Subend, char _Value) { + return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeSub16(short volatile *_Subend, short _Value) { + return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeSub(long volatile *_Subend, long _Value) { + return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { + return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Increment +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedIncrement16(char volatile *_Value) { + return __atomic_add_fetch(_Value, 1, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedIncrement(long volatile *_Value) { + return __atomic_add_fetch(_Value, 1, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedIncrement64(__int64 volatile *_Value) { + return __atomic_add_fetch(_Value, 1, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Decrement +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedDecrement16(char volatile *_Value) { + return __atomic_sub_fetch(_Value, 1, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedDecrement(long volatile *_Value) { + return __atomic_sub_fetch(_Value, 1, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedDecrement64(__int64 volatile *_Value) { + return __atomic_sub_fetch(_Value, 1, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked And +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedAnd8(char volatile *_Value, char _Mask) { + return __atomic_and_fetch(_Value, _Mask, 0); +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedAnd16(short volatile *_Value, short _Mask) { + return __atomic_and_fetch(_Value, _Mask, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedAnd(long volatile *_Value, long _Mask) { + return __atomic_and_fetch(_Value, _Mask, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_and_fetch(_Value, _Mask, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Or +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedOr8(char volatile *_Value, char _Mask) { + return __atomic_or_fetch(_Value, _Mask, 0); +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedOr16(short volatile *_Value, short _Mask) { + return __atomic_or_fetch(_Value, _Mask, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedOr(long volatile *_Value, long _Mask) { + return __atomic_or_fetch(_Value, _Mask, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_or_fetch(_Value, _Mask, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Xor +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedXor8(char volatile *_Value, char _Mask) { + return __atomic_xor_fetch(_Value, _Mask, 0); +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedXor16(short volatile *_Value, short _Mask) { + return __atomic_xor_fetch(_Value, _Mask, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedXor(long volatile *_Value, long _Mask) { + return __atomic_xor_fetch(_Value, _Mask, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_xor_fetch(_Value, _Mask, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchange8(char volatile *_Target, char _Value) { + __atomic_exchange(_Target, &_Value, &_Value, 0); + return _Value; +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchange16(short volatile *_Target, short _Value) { + __atomic_exchange(_Target, &_Value, &_Value, 0); + return _Value; +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchange(long volatile *_Target, long _Value) { + __atomic_exchange(_Target, &_Value, &_Value, 0); + return _Value; +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { + __atomic_exchange(_Target, &_Value, &_Value, 0); + return _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Compare Exchange +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedCompareExchange8(char volatile *_Destination, + char _Exchange, char _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + return _Comparand; +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedCompareExchange16(short volatile *_Destination, + short _Exchange, short _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + return _Comparand; +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedCompareExchange(long volatile *_Destination, + long _Exchange, long _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + return _Comparand; +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedCompareExchange64(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + return _Comparand; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Barriers +\*----------------------------------------------------------------------------*/ +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +_ReadWriteBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +_ReadBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +_WriteBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +/*----------------------------------------------------------------------------*\ +|* Misc +\*----------------------------------------------------------------------------*/ +static __inline__ void * __attribute__((__always_inline__, __nodebug__)) +_AddressOfReturnAddress(void) { + return (void*)((char*)__builtin_frame_address(0) + sizeof(void*)); +} +static __inline__ void * __attribute__((__always_inline__, __nodebug__)) +_ReturnAddress(void) { + return __builtin_return_address(0); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INTRIN_H */ +#endif /* _MSC_VER */ diff --git a/python/clang_includes/avx2intrin.h b/python/clang_includes/avx2intrin.h index 63b1efc1..95744693 100644 --- a/python/clang_includes/avx2intrin.h +++ b/python/clang_includes/avx2intrin.h @@ -25,6 +25,9 @@ #error "Never use directly; include instead." #endif +#ifndef __AVX2INTRIN_H +#define __AVX2INTRIN_H + /* SSE4 Multiple Packed Sums of Absolute Difference. */ #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) @@ -750,9 +753,9 @@ _mm256_broadcastsd_pd(__m128d __X) } static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) -_mm_broadcastsi128_si256(__m128i const *__a) +_mm256_broadcastsi128_si256(__m128i __X) { - return (__m256i)__builtin_ia32_vbroadcastsi256(__a); + return (__m256i)__builtin_ia32_vbroadcastsi256(__X); } #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ @@ -1058,7 +1061,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) #define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ __m128i __a = (a); \ - int const *__m = (m); \ + long long const *__m = (m); \ __m128i __i = (i); \ __m128i __mask = (mask); \ (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ @@ -1066,7 +1069,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) #define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ __m256i __a = (a); \ - int const *__m = (m); \ + long long const *__m = (m); \ __m128i __i = (i); \ __m256i __mask = (mask); \ (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ @@ -1074,7 +1077,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) #define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ __m128i __a = (a); \ - int const *__m = (m); \ + long long const *__m = (m); \ __m128i __i = (i); \ __m128i __mask = (mask); \ (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ @@ -1082,7 +1085,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) #define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ __m256i __a = (a); \ - int const *__m = (m); \ + long long const *__m = (m); \ __m256i __i = (i); \ __m256i __mask = (mask); \ (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ @@ -1173,29 +1176,31 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) (__v4si)_mm_set1_epi32(-1), (s)); }) #define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ - int const *__m = (m); \ + long long const *__m = (m); \ __m128i __i = (i); \ (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ (const __v2di *)__m, (__v4si)__i, \ (__v2di)_mm_set1_epi64x(-1), (s)); }) #define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ - int const *__m = (m); \ + long long const *__m = (m); \ __m128i __i = (i); \ (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ (const __v4di *)__m, (__v4si)__i, \ (__v4di)_mm256_set1_epi64x(-1), (s)); }) #define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ - int const *__m = (m); \ + long long const *__m = (m); \ __m128i __i = (i); \ (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ (const __v2di *)__m, (__v2di)__i, \ (__v2di)_mm_set1_epi64x(-1), (s)); }) #define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ - int const *__m = (m); \ + long long const *__m = (m); \ __m256i __i = (i); \ (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ (const __v4di *)__m, (__v4di)__i, \ (__v4di)_mm256_set1_epi64x(-1), (s)); }) + +#endif /* __AVX2INTRIN_H */ diff --git a/python/clang_includes/avxintrin.h b/python/clang_includes/avxintrin.h index 0683a65f..141c4d99 100644 --- a/python/clang_includes/avxintrin.h +++ b/python/clang_includes/avxintrin.h @@ -25,6 +25,9 @@ #error "Never use directly; include instead." #endif +#ifndef __AVXINTRIN_H +#define __AVXINTRIN_H + typedef double __v4df __attribute__ ((__vector_size__ (32))); typedef float __v8sf __attribute__ ((__vector_size__ (32))); typedef long long __v4di __attribute__ ((__vector_size__ (32))); @@ -432,21 +435,21 @@ static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi32(__m256i __a, int const __imm) { __v8si __b = (__v8si)__a; - return __b[__imm]; + return __b[__imm & 7]; } static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi16(__m256i __a, int const __imm) { __v16hi __b = (__v16hi)__a; - return __b[__imm]; + return __b[__imm & 15]; } static __inline int __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi8(__m256i __a, int const __imm) { __v32qi __b = (__v32qi)__a; - return __b[__imm]; + return __b[__imm & 31]; } #ifdef __x86_64__ @@ -454,7 +457,7 @@ static __inline long long __attribute__((__always_inline__, __nodebug__)) _mm256_extract_epi64(__m256i __a, const int __imm) { __v4di __b = (__v4di)__a; - return __b[__imm]; + return __b[__imm & 3]; } #endif @@ -1134,22 +1137,19 @@ _mm256_castsi256_si128(__m256i __a) static __inline __m256d __attribute__((__always_inline__, __nodebug__)) _mm256_castpd128_pd256(__m128d __a) { - __m128d __zero = _mm_setzero_pd(); - return __builtin_shufflevector(__a, __zero, 0, 1, 2, 2); + return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); } static __inline __m256 __attribute__((__always_inline__, __nodebug__)) _mm256_castps128_ps256(__m128 __a) { - __m128 __zero = _mm_setzero_ps(); - return __builtin_shufflevector(__a, __zero, 0, 1, 2, 3, 4, 4, 4, 4); + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); } static __inline __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_castsi128_si256(__m128i __a) { - __m128i __zero = _mm_setzero_si128(); - return __builtin_shufflevector(__a, __zero, 0, 1, 2, 2); + return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); } /* SIMD load ops (unaligned) */ @@ -1220,3 +1220,5 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) __v128 = _mm256_extractf128_si256(__a, 1); __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128); } + +#endif /* __AVXINTRIN_H */ diff --git a/python/clang_includes/cpuid.h b/python/clang_includes/cpuid.h index 7b012384..8f12caeb 100644 --- a/python/clang_includes/cpuid.h +++ b/python/clang_includes/cpuid.h @@ -25,10 +25,132 @@ #error this header is for x86 only #endif +/* Features in %ecx for level 1 */ +#define bit_SSE3 0x00000001 +#define bit_PCLMULQDQ 0x00000002 +#define bit_DTES64 0x00000004 +#define bit_MONITOR 0x00000008 +#define bit_DSCPL 0x00000010 +#define bit_VMX 0x00000020 +#define bit_SMX 0x00000040 +#define bit_EIST 0x00000080 +#define bit_TM2 0x00000100 +#define bit_SSSE3 0x00000200 +#define bit_CNXTID 0x00000400 +#define bit_FMA 0x00001000 +#define bit_CMPXCHG16B 0x00002000 +#define bit_xTPR 0x00004000 +#define bit_PDCM 0x00008000 +#define bit_PCID 0x00020000 +#define bit_DCA 0x00040000 +#define bit_SSE41 0x00080000 +#define bit_SSE42 0x00100000 +#define bit_x2APIC 0x00200000 +#define bit_MOVBE 0x00400000 +#define bit_POPCNT 0x00800000 +#define bit_TSCDeadline 0x01000000 +#define bit_AESNI 0x02000000 +#define bit_XSAVE 0x04000000 +#define bit_OSXSAVE 0x08000000 +#define bit_AVX 0x10000000 +#define bit_RDRAND 0x40000000 + +/* Features in %edx for level 1 */ +#define bit_FPU 0x00000001 +#define bit_VME 0x00000002 +#define bit_DE 0x00000004 +#define bit_PSE 0x00000008 +#define bit_TSC 0x00000010 +#define bit_MSR 0x00000020 +#define bit_PAE 0x00000040 +#define bit_MCE 0x00000080 +#define bit_CX8 0x00000100 +#define bit_APIC 0x00000200 +#define bit_SEP 0x00000800 +#define bit_MTRR 0x00001000 +#define bit_PGE 0x00002000 +#define bit_MCA 0x00004000 +#define bit_CMOV 0x00008000 +#define bit_PAT 0x00010000 +#define bit_PSE36 0x00020000 +#define bit_PSN 0x00040000 +#define bit_CLFSH 0x00080000 +#define bit_DS 0x00200000 +#define bit_ACPI 0x00400000 +#define bit_MMX 0x00800000 +#define bit_FXSR 0x01000000 +#define bit_SSE 0x02000000 +#define bit_SSE2 0x04000000 +#define bit_SS 0x08000000 +#define bit_HTT 0x10000000 +#define bit_TM 0x20000000 +#define bit_PBE 0x80000000 + +/* Features in %ebx for level 7 sub-leaf 0 */ +#define bit_FSGSBASE 0x00000001 +#define bit_SMEP 0x00000080 +#define bit_ENH_MOVSB 0x00000200 + +/* PIC on i386 uses %ebx, so preserve it. */ +#if __i386__ +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm(" pushl %%ebx\n" \ + " cpuid\n" \ + " mov %%ebx,%1\n" \ + " popl %%ebx" \ + : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm(" pushl %%ebx\n" \ + " cpuid\n" \ + " mov %%ebx,%1\n" \ + " popl %%ebx" \ + : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#else +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#endif + static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, unsigned int *__ebx, unsigned int *__ecx, unsigned int *__edx) { - __asm("cpuid" : "=a"(*__eax), "=b" (*__ebx), "=c"(*__ecx), "=d"(*__edx) - : "0"(__level)); + __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx); return 1; } + +static __inline int __get_cpuid_max (unsigned int __level, unsigned int *__sig) +{ + unsigned int __eax, __ebx, __ecx, __edx; +#if __i386__ + int __cpuid_supported; + + __asm(" pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x00200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " movl $0,%0\n" + " cmpl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + "1:" + : "=r" (__cpuid_supported) : : "eax", "ecx"); + if (!__cpuid_supported) + return 0; +#endif + + __cpuid(__level, __eax, __ebx, __ecx, __edx); + if (__sig) + *__sig = __ebx; + return __eax; +} diff --git a/python/clang_includes/emmintrin.h b/python/clang_includes/emmintrin.h index 56c6c228..b3f85695 100644 --- a/python/clang_includes/emmintrin.h +++ b/python/clang_includes/emmintrin.h @@ -245,13 +245,15 @@ _mm_cmple_sd(__m128d __a, __m128d __b) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__b, __a, 1); + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1); + return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__b, __a, 2); + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2); + return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) @@ -287,13 +289,15 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__b, __a, 5); + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5); + return (__m128d) { __c[0], __a[1] }; } static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_sd(__m128d __a, __m128d __b) { - return (__m128d)__builtin_ia32_cmpsd(__b, __a, 6); + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6); + return (__m128d) { __c[0], __a[1] }; } static __inline__ int __attribute__((__always_inline__, __nodebug__)) @@ -822,7 +826,9 @@ _mm_xor_si128(__m128i __a, __m128i __b) } #define _mm_slli_si128(a, count) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); }) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) @@ -887,7 +893,9 @@ _mm_sra_epi32(__m128i __a, __m128i __count) #define _mm_srli_si128(a, count) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); }) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) @@ -1210,6 +1218,14 @@ _mm_stream_si32(int *__p, int __a) __builtin_ia32_movnti(__p, __a); } +#ifdef __x86_64__ +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_si64(long long *__p, long long __a) +{ + __builtin_ia32_movnti64(__p, __a); +} +#endif + static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_clflush(void const *__p) { @@ -1250,7 +1266,7 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__)) _mm_extract_epi16(__m128i __a, int __imm) { __v8hi __b = (__v8hi)__a; - return (unsigned short)__b[__imm]; + return (unsigned short)__b[__imm & 7]; } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) @@ -1268,20 +1284,26 @@ _mm_movemask_epi8(__m128i __a) } #define _mm_shuffle_epi32(a, imm) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \ (imm) & 0x3, ((imm) & 0xc) >> 2, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); }) #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ (imm) & 0x3, ((imm) & 0xc) >> 2, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 4, 5, 6, 7); }) #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ 0, 1, 2, 3, \ 4 + (((imm) & 0x03) >> 0), \ @@ -1344,7 +1366,7 @@ _mm_movepi64_pi64(__m128i __a) } static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) -_mm_movpi64_pi64(__m64 __a) +_mm_movpi64_epi64(__m64 __a) { return (__m128i){ (long long)__a, 0 }; } @@ -1374,8 +1396,10 @@ _mm_movemask_pd(__m128d __a) } #define _mm_shuffle_pd(a, b, i) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ __m128d __a = (a); \ __m128d __b = (b); \ + _Pragma("clang diagnostic pop"); \ __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); }) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) diff --git a/python/clang_includes/f16cintrin.h b/python/clang_includes/f16cintrin.h index a6d7812a..f3614c0e 100644 --- a/python/clang_includes/f16cintrin.h +++ b/python/clang_includes/f16cintrin.h @@ -1,6 +1,6 @@ -/*===---- f16cintrin.h - F16C intrinsics ---------------------------------=== +/*===---- f16cintrin.h - F16C intrinsics -----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining __a copy + * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell diff --git a/python/clang_includes/immintrin.h b/python/clang_includes/immintrin.h index fea7c3ba..15d6e05f 100644 --- a/python/clang_includes/immintrin.h +++ b/python/clang_includes/immintrin.h @@ -111,4 +111,8 @@ _xtest(void) } #endif +#ifdef __SHA__ +#include +#endif + #endif /* __IMMINTRIN_H */ diff --git a/python/clang_includes/limits.h b/python/clang_includes/limits.h index ecd09a4a..91bd4046 100644 --- a/python/clang_includes/limits.h +++ b/python/clang_includes/limits.h @@ -87,8 +87,10 @@ #define CHAR_MAX __SCHAR_MAX__ #endif -/* C99 5.2.4.2.1: Added long long. */ -#if __STDC_VERSION__ >= 199901 +/* C99 5.2.4.2.1: Added long long. + C++11 18.3.3.2: same contents as the Standard C Library header . + */ +#if __STDC_VERSION__ >= 199901 || __cplusplus >= 201103L #undef LLONG_MIN #undef LLONG_MAX diff --git a/python/clang_includes/module.map b/python/clang_includes/module.map index aa219cb4..9f7944de 100644 --- a/python/clang_includes/module.map +++ b/python/clang_includes/module.map @@ -4,6 +4,16 @@ module _Builtin_intrinsics [system] { header "altivec.h" } + explicit module arm { + requires arm + + explicit module neon { + requires neon + header "arm_neon.h" + export * + } + } + explicit module intel { requires x86 export * @@ -34,6 +44,7 @@ module _Builtin_intrinsics [system] { explicit module sse { requires sse export mmx + export * // note: for hackish dependency header "xmmintrin.h" } diff --git a/python/clang_includes/prfchwintrin.h b/python/clang_includes/prfchwintrin.h index 2d529c66..9825bd8c 100644 --- a/python/clang_includes/prfchwintrin.h +++ b/python/clang_includes/prfchwintrin.h @@ -25,6 +25,9 @@ #error "Never use directly; include or instead." #endif +#ifndef __PRFCHWINTRIN_H +#define __PRFCHWINTRIN_H + #if defined(__PRFCHW__) || defined(__3dNOW__) static __inline__ void __attribute__((__always_inline__, __nodebug__)) _m_prefetchw(void *__P) @@ -32,3 +35,5 @@ _m_prefetchw(void *__P) __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); } #endif + +#endif /* __PRFCHWINTRIN_H */ diff --git a/python/clang_includes/rdseedintrin.h b/python/clang_includes/rdseedintrin.h index 54aabd17..0fef1fa4 100644 --- a/python/clang_includes/rdseedintrin.h +++ b/python/clang_includes/rdseedintrin.h @@ -25,6 +25,9 @@ #error "Never use directly; include instead." #endif +#ifndef __RDSEEDINTRIN_H +#define __RDSEEDINTRIN_H + #ifdef __RDSEED__ static __inline__ int __attribute__((__always_inline__, __nodebug__)) _rdseed16_step(unsigned short *__p) @@ -46,3 +49,4 @@ _rdseed64_step(unsigned long long *__p) } #endif #endif /* __RDSEED__ */ +#endif /* __RDSEEDINTRIN_H */ diff --git a/python/clang_includes/rtmintrin.h b/python/clang_includes/rtmintrin.h index bdc2b994..26149ca8 100644 --- a/python/clang_includes/rtmintrin.h +++ b/python/clang_includes/rtmintrin.h @@ -25,6 +25,9 @@ #error "Never use directly; include instead." #endif +#ifndef __RTMINTRIN_H +#define __RTMINTRIN_H + #define _XBEGIN_STARTED (~0u) #define _XABORT_EXPLICIT (1 << 0) #define _XABORT_RETRY (1 << 1) @@ -47,3 +50,5 @@ _xend(void) } #define _xabort(imm) __builtin_ia32_xabort((imm)) + +#endif /* __RTMINTRIN_H */ diff --git a/python/clang_includes/shaintrin.h b/python/clang_includes/shaintrin.h new file mode 100644 index 00000000..66ed0554 --- /dev/null +++ b/python/clang_includes/shaintrin.h @@ -0,0 +1,74 @@ +/*===---- shaintrin.h - SHA intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __SHAINTRIN_H +#define __SHAINTRIN_H + +#if !defined (__SHA__) +# error "SHA instructions not enabled" +#endif + +#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \ + __builtin_ia32_sha1rnds4((V1), (V2), (M)); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha1nexte_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha1nexte(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha1msg1_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha1msg1(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha1msg2_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha1msg2(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z) +{ + return __builtin_ia32_sha256rnds2(__X, __Y, __Z); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha256msg1_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha256msg1(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha256msg2_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha256msg2(__X, __Y); +} + +#endif /* __SHAINTRIN_H */ diff --git a/python/clang_includes/smmintrin.h b/python/clang_includes/smmintrin.h index 498f6f0d..53b3ccb4 100644 --- a/python/clang_includes/smmintrin.h +++ b/python/clang_includes/smmintrin.h @@ -197,7 +197,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) #define _mm_extract_ps(X, N) (__extension__ \ ({ union { int __i; float __f; } __t; \ __v4sf __a = (__v4sf)(X); \ - __t.__f = __a[N]; \ + __t.__f = __a[(N) & 3]; \ __t.__i;})) /* Miscellaneous insert and extract macros. */ @@ -215,14 +215,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /* Insert int into packed integer array at index. */ #define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ - __a[(N)] = (I); \ + __a[(N) & 15] = (I); \ __a;})) #define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - __a[(N)] = (I); \ + __a[(N) & 3] = (I); \ __a;})) #ifdef __x86_64__ #define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[(N)] = (I); \ + __a[(N) & 1] = (I); \ __a;})) #endif /* __x86_64__ */ @@ -230,12 +230,13 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) * as a zero extended value, so it is unsigned. */ #define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ - (unsigned char)__a[(N)];})) + (int)(unsigned char) \ + __a[(N) & 15];})) #define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ - (unsigned)__a[(N)];})) + __a[(N) & 3];})) #ifdef __x86_64__ #define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ - __a[(N)];})) + __a[(N) & 1];})) #endif /* __x86_64 */ /* SSE4 128-bit Packed Integer Comparisons. */ diff --git a/python/clang_includes/tbmintrin.h b/python/clang_includes/tbmintrin.h new file mode 100644 index 00000000..f95e34fb --- /dev/null +++ b/python/clang_includes/tbmintrin.h @@ -0,0 +1,158 @@ +/*===---- tbmintrin.h - TBM intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __TBM__ +#error "TBM instruction set is not enabled" +#endif + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __TBMINTRIN_H +#define __TBMINTRIN_H + +#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b))) + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcfill_u32(unsigned int a) +{ + return a & (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blci_u32(unsigned int a) +{ + return a | ~(a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcic_u32(unsigned int a) +{ + return ~a & (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcmsk_u32(unsigned int a) +{ + return a ^ (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcs_u32(unsigned int a) +{ + return a | (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blsfill_u32(unsigned int a) +{ + return a | (a - 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blsic_u32(unsigned int a) +{ + return ~a | (a - 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__t1mskc_u32(unsigned int a) +{ + return ~a | (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__tzmsk_u32(unsigned int a) +{ + return ~a & (a - 1); +} + +#ifdef __x86_64__ +#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b))) + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcfill_u64(unsigned long long a) +{ + return a & (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blci_u64(unsigned long long a) +{ + return a | ~(a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcic_u64(unsigned long long a) +{ + return ~a & (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcmsk_u64(unsigned long long a) +{ + return a ^ (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcs_u64(unsigned long long a) +{ + return a | (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blsfill_u64(unsigned long long a) +{ + return a | (a - 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blsic_u64(unsigned long long a) +{ + return ~a | (a - 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__t1mskc_u64(unsigned long long a) +{ + return ~a | (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__tzmsk_u64(unsigned long long a) +{ + return ~a & (a - 1); +} +#endif + +#endif /* __TBMINTRIN_H */ diff --git a/python/clang_includes/tgmath.h b/python/clang_includes/tgmath.h index 4fa1cf72..a48e267e 100644 --- a/python/clang_includes/tgmath.h +++ b/python/clang_includes/tgmath.h @@ -1340,15 +1340,15 @@ static long double _Complex // creal -static float _Complex +static float _TG_ATTRS __tg_creal(float __x) {return __x;} -static double _Complex +static double _TG_ATTRS __tg_creal(double __x) {return __x;} -static long double _Complex +static long double _TG_ATTRS __tg_creal(long double __x) {return __x;} diff --git a/python/clang_includes/unwind.h b/python/clang_includes/unwind.h index e94fd709..685c1dfd 100644 --- a/python/clang_includes/unwind.h +++ b/python/clang_includes/unwind.h @@ -27,8 +27,8 @@ #define __CLANG_UNWIND_H #if __has_include_next() -/* Darwin and libunwind provide an unwind.h. If that's available, use - * it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, +/* Darwin (from 11.x on) and libunwind provide an unwind.h. If that's available, + * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, * so define that around the include.*/ # ifndef _GNU_SOURCE # define _SHOULD_UNDEFINE_GNU_SOURCE @@ -66,7 +66,17 @@ extern "C" { #pragma GCC visibility push(default) #endif +typedef uintptr_t _Unwind_Word; +typedef intptr_t _Unwind_Sword; +typedef uintptr_t _Unwind_Ptr; +typedef uintptr_t _Unwind_Internal_Ptr; +typedef uint64_t _Unwind_Exception_Class; + +typedef intptr_t _sleb128_t; +typedef uintptr_t _uleb128_t; + struct _Unwind_Context; +struct _Unwind_Exception; typedef enum { _URC_NO_REASON = 0, _URC_FOREIGN_EXCEPTION_CAUGHT = 1, @@ -81,8 +91,43 @@ typedef enum { _URC_CONTINUE_UNWIND = 8 } _Unwind_Reason_Code; +typedef enum { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, -#ifdef __arm__ + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */ +} _Unwind_Action; + +typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, + struct _Unwind_Exception *); + +struct _Unwind_Exception { + _Unwind_Exception_Class exception_class; + _Unwind_Exception_Cleanup_Fn exception_cleanup; + _Unwind_Word private_1; + _Unwind_Word private_2; + /* The Itanium ABI requires that _Unwind_Exception objects are "double-word + * aligned". GCC has interpreted this to mean "use the maximum useful + * alignment for the target"; so do we. */ +} __attribute__((__aligned__)); + +typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, + _Unwind_Exception_Class, + struct _Unwind_Exception *, + struct _Unwind_Context *, + void *); + +typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)( + int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *, + struct _Unwind_Context *); +typedef _Unwind_Personality_Fn __personality_routine; + +typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, + void *); + +#if defined(__arm__) && !defined(__APPLE__) typedef enum { _UVRSC_CORE = 0, /* integer register */ @@ -111,14 +156,116 @@ _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context, _Unwind_VRS_DataRepresentation __representation, void *__valuep); -#else +_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context, + _Unwind_VRS_RegClass __regclass, + uint32_t __regno, + _Unwind_VRS_DataRepresentation __representation, + void *__valuep); -uintptr_t _Unwind_GetIP(struct _Unwind_Context* __context); +static __inline__ +_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) { + _Unwind_Word __value; + _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); + return __value; +} + +static __inline__ +void _Unwind_SetGR(struct _Unwind_Context *__context, int __index, + _Unwind_Word __value) { + _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); +} + +static __inline__ +_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) { + _Unwind_Word __ip = _Unwind_GetGR(__context, 15); + return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */ +} + +static __inline__ +void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) { + _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1; + _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit); +} +#else +_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int); +void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word); + +_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *); +void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word); +#endif + + +_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *); + +_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *); + +void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *); + +_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); + +/* DWARF EH functions; currently not available on Darwin/ARM */ +#if !defined(__APPLE__) || !defined(__arm__) + +_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); +void _Unwind_DeleteException(struct _Unwind_Exception *); +void _Unwind_Resume(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *); + +#endif + +_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + +/* setjmp(3)/longjmp(3) stuff */ +typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t; + +void _Unwind_SjLj_Register(_Unwind_FunctionContext_t); +void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t); +_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); +void _Unwind_SjLj_Resume(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *); + +void *_Unwind_FindEnclosingFunction(void *); + +#ifdef __APPLE__ + +_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *) + __attribute__((unavailable)); +_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *) + __attribute__((unavailable)); + +/* Darwin-specific functions */ +void __register_frame(const void *); +void __deregister_frame(const void *); + +struct dwarf_eh_bases { + uintptr_t tbase; + uintptr_t dbase; + uintptr_t func; +}; +void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *); + +void __register_frame_info_bases(const void *, void *, void *, void *) + __attribute__((unavailable)); +void __register_frame_info(const void *, void *) __attribute__((unavailable)); +void __register_frame_info_table_bases(const void *, void*, void *, void *) + __attribute__((unavailable)); +void __register_frame_info_table(const void *, void *) + __attribute__((unavailable)); +void __register_frame_table(const void *) __attribute__((unavailable)); +void __deregister_frame_info(const void *) __attribute__((unavailable)); +void __deregister_frame_info_bases(const void *)__attribute__((unavailable)); + +#else + +_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *); +_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *); #endif -typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context*, void*); -_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void*); #ifndef HIDE_EXPORTS #pragma GCC visibility pop diff --git a/python/clang_includes/x86intrin.h b/python/clang_includes/x86intrin.h index 94fbe2fe..399016f1 100644 --- a/python/clang_includes/x86intrin.h +++ b/python/clang_includes/x86intrin.h @@ -66,6 +66,10 @@ #include #endif +#ifdef __TBM__ +#include +#endif + #ifdef __F16C__ #include #endif diff --git a/python/clang_includes/xmmintrin.h b/python/clang_includes/xmmintrin.h index 8c5fc952..c68d3ed7 100644 --- a/python/clang_includes/xmmintrin.h +++ b/python/clang_includes/xmmintrin.h @@ -218,7 +218,9 @@ _mm_cmple_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpgt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__b, __a, 1); + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 1), + 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -230,7 +232,9 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpge_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__b, __a, 2); + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 2), + 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -278,7 +282,9 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpngt_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__b, __a, 5); + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 5), + 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -290,7 +296,9 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_cmpnge_ss(__m128 __a, __m128 __b) { - return (__m128)__builtin_ia32_cmpss(__b, __a, 6); + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 6), + 4, 1, 2, 3); } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -983,12 +991,10 @@ do { \ #define _m_ _mm_ #define _m_ _mm_ -#if !__has_feature(modules) /* Ugly hack for backwards-compatibility (compatible with gcc) */ #ifdef __SSE2__ #include #endif -#endif #endif /* __SSE__ */ diff --git a/python/clang_includes/xopintrin.h b/python/clang_includes/xopintrin.h index 9a5824c9..cc94ca02 100644 --- a/python/clang_includes/xopintrin.h +++ b/python/clang_includes/xopintrin.h @@ -342,6 +342,399 @@ _mm_sha_epi64(__m128i __A, __m128i __B) __m128i __B = (B); \ (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) +#define _MM_PCOMCTRL_LT 0 +#define _MM_PCOMCTRL_LE 1 +#define _MM_PCOMCTRL_GT 2 +#define _MM_PCOMCTRL_GE 3 +#define _MM_PCOMCTRL_EQ 4 +#define _MM_PCOMCTRL_NEQ 5 +#define _MM_PCOMCTRL_FALSE 6 +#define _MM_PCOMCTRL_TRUE 7 + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); +} + #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ __m128d __X = (X); \ __m128d __Y = (Y); \