Updating to clang 3.4 builtin headers

This commit is contained in:
Strahinja Val Markovic 2014-01-11 12:26:29 -08:00
parent 2ebea627d9
commit f30857df22
20 changed files with 1806 additions and 55 deletions

View File

@ -0,0 +1,784 @@
/* ===-------- Intrin.h ---------------------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
/* Only include this if we're compiling for the windows platform. */
#ifndef _MSC_VER
#include_next <Intrin.h>
#else
#ifndef __INTRIN_H
#define __INTRIN_H
/* First include the standard intrinsics. */
#include <x86intrin.h>
#ifdef __cplusplus
extern "C" {
#endif
/* And the random ones that aren't in those files. */
__m64 _m_from_float(float);
__m64 _m_from_int(int _l);
void _m_prefetch(void *);
float _m_to_float(__m64);
int _m_to_int(__m64 _M);
/* Other assorted instruction intrinsics. */
void __addfsbyte(unsigned long, unsigned char);
void __addfsdword(unsigned long, unsigned long);
void __addfsword(unsigned long, unsigned short);
void __code_seg(const char *);
void __cpuid(int[4], int);
void __cpuidex(int[4], int, int);
void __debugbreak(void);
__int64 __emul(int, int);
unsigned __int64 __emulu(unsigned int, unsigned int);
void __cdecl __fastfail(unsigned int);
unsigned int __getcallerseflags(void);
void __halt(void);
unsigned char __inbyte(unsigned short);
void __inbytestring(unsigned short, unsigned char *, unsigned long);
void __incfsbyte(unsigned long);
void __incfsdword(unsigned long);
void __incfsword(unsigned long);
unsigned long __indword(unsigned short);
void __indwordstring(unsigned short, unsigned long *, unsigned long);
void __int2c(void);
void __invlpg(void *);
unsigned short __inword(unsigned short);
void __inwordstring(unsigned short, unsigned short *, unsigned long);
void __lidt(void *);
unsigned __int64 __ll_lshift(unsigned __int64, int);
__int64 __ll_rshift(__int64, int);
void __llwpcb(void *);
unsigned char __lwpins32(unsigned int, unsigned int, unsigned int);
void __lwpval32(unsigned int, unsigned int, unsigned int);
unsigned int __lzcnt(unsigned int);
unsigned short __lzcnt16(unsigned short);
void __movsb(unsigned char *, unsigned char const *, size_t);
void __movsd(unsigned long *, unsigned long const *, size_t);
void __movsw(unsigned short *, unsigned short const *, size_t);
void __nop(void);
void __nvreg_restore_fence(void);
void __nvreg_save_fence(void);
void __outbyte(unsigned short, unsigned char);
void __outbytestring(unsigned short, unsigned char *, unsigned long);
void __outdword(unsigned short, unsigned long);
void __outdwordstring(unsigned short, unsigned long *, unsigned long);
void __outword(unsigned short, unsigned short);
void __outwordstring(unsigned short, unsigned short *, unsigned long);
static __inline__
unsigned int __popcnt(unsigned int);
static __inline__
unsigned short __popcnt16(unsigned short);
unsigned __int64 __rdtsc(void);
unsigned __int64 __rdtscp(unsigned int *);
unsigned long __readcr0(void);
unsigned long __readcr2(void);
unsigned long __readcr3(void);
unsigned long __readcr5(void);
unsigned long __readcr8(void);
unsigned int __readdr(unsigned int);
unsigned int __readeflags(void);
unsigned char __readfsbyte(unsigned long);
unsigned long __readfsdword(unsigned long);
unsigned __int64 __readfsqword(unsigned long);
unsigned short __readfsword(unsigned long);
unsigned __int64 __readmsr(unsigned long);
unsigned __int64 __readpmc(unsigned long);
unsigned long __segmentlimit(unsigned long);
void __sidt(void *);
void *__slwpcb(void);
void __stosb(unsigned char *, unsigned char, size_t);
void __stosd(unsigned long *, unsigned long, size_t);
void __stosw(unsigned short *, unsigned short, size_t);
void __svm_clgi(void);
void __svm_invlpga(void *, int);
void __svm_skinit(int);
void __svm_stgi(void);
void __svm_vmload(size_t);
void __svm_vmrun(size_t);
void __svm_vmsave(size_t);
void __ud2(void);
unsigned __int64 __ull_rshift(unsigned __int64, int);
void __vmx_off(void);
void __vmx_vmptrst(unsigned __int64 *);
void __wbinvd(void);
void __writecr0(unsigned int);
void __writecr3(unsigned int);
void __writecr4(unsigned int);
void __writecr8(unsigned int);
void __writedr(unsigned int, unsigned int);
void __writeeflags(unsigned int);
void __writefsbyte(unsigned long, unsigned char);
void __writefsdword(unsigned long, unsigned long);
void __writefsqword(unsigned long, unsigned __int64);
void __writefsword(unsigned long, unsigned short);
void __writemsr(unsigned long, unsigned __int64);
static __inline__
void *_AddressOfReturnAddress(void);
unsigned int _andn_u32(unsigned int, unsigned int);
unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
unsigned int _bextri_u32(unsigned int, unsigned int);
static __inline__
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
static __inline__
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
static __inline__
unsigned char _bittest(long const *, long);
static __inline__
unsigned char _bittestandcomplement(long *, long);
static __inline__
unsigned char _bittestandreset(long *, long);
static __inline__
unsigned char _bittestandset(long *, long);
unsigned int _blcfill_u32(unsigned int);
unsigned int _blci_u32(unsigned int);
unsigned int _blcic_u32(unsigned int);
unsigned int _blcmsk_u32(unsigned int);
unsigned int _blcs_u32(unsigned int);
unsigned int _blsfill_u32(unsigned int);
unsigned int _blsi_u32(unsigned int);
unsigned int _blsic_u32(unsigned int);
unsigned int _blsmsk_u32(unsigned int);
unsigned int _blsmsk_u32(unsigned int);
unsigned int _blsr_u32(unsigned int);
unsigned int _blsr_u32(unsigned int);
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
unsigned long __cdecl _byteswap_ulong(unsigned long);
unsigned short __cdecl _byteswap_ushort(unsigned short);
unsigned _bzhi_u32(unsigned int, unsigned int);
void __cdecl _disable(void);
void __cdecl _enable(void);
void __cdecl _fxrstor(void const *);
void __cdecl _fxsave(void *);
long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);
static __inline__
long _InterlockedAnd(long volatile *_Value, long _Mask);
static __inline__
short _InterlockedAnd16(short volatile *_Value, short _Mask);
static __inline__
char _InterlockedAnd8(char volatile *_Value, char _Mask);
unsigned char _interlockedbittestandreset(long volatile *, long);
unsigned char _interlockedbittestandset(long volatile *, long);
static __inline__
long __cdecl _InterlockedCompareExchange(long volatile *_Destination,
long _Exchange, long _Comparand);
long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long);
long _InterlockedCompareExchange_HLERelease(long volatile *, long, long);
static __inline__
short _InterlockedCompareExchange16(short volatile *_Destination,
short _Exchange, short _Comparand);
static __inline__
__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64,
__int64);
__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64,
__int64);
static __inline__
char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange,
char _Comparand);
void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,
void *);
void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,
void *);
static __inline__
long __cdecl _InterlockedDecrement(long volatile *_Addend);
static __inline__
short _InterlockedDecrement16(short volatile *_Addend);
static __inline__
long __cdecl _InterlockedExchange(long volatile *_Target, long _Value);
static __inline__
short _InterlockedExchange16(short volatile *_Target, short _Value);
static __inline__
char _InterlockedExchange8(char volatile *_Target, char _Value);
static __inline__
long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value);
long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);
long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
static __inline__
char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value);
static __inline__
long __cdecl _InterlockedIncrement(long volatile *_Addend);
static __inline__
short _InterlockedIncrement16(short volatile *_Addend);
static __inline__
long _InterlockedOr(long volatile *_Value, long _Mask);
static __inline__
short _InterlockedOr16(short volatile *_Value, short _Mask);
static __inline__
char _InterlockedOr8(char volatile *_Value, char _Mask);
static __inline__
long _InterlockedXor(long volatile *_Value, long _Mask);
static __inline__
short _InterlockedXor16(short volatile *_Value, short _Mask);
static __inline__
char _InterlockedXor8(char volatile *_Value, char _Mask);
void __cdecl _invpcid(unsigned int, void *);
static __inline__
unsigned long __cdecl _lrotl(unsigned long, int);
static __inline__
unsigned long __cdecl _lrotr(unsigned long, int);
static __inline__
unsigned int _lzcnt_u32(unsigned int);
static __inline__
void _ReadBarrier(void);
static __inline__
void _ReadWriteBarrier(void);
static __inline__
void *_ReturnAddress(void);
unsigned int _rorx_u32(unsigned int, const unsigned int);
int __cdecl _rdrand16_step(unsigned short *);
int __cdecl _rdrand32_step(unsigned int *);
static __inline__
unsigned int __cdecl _rotl(unsigned int _Value, int _Shift);
static __inline__
unsigned short _rotl16(unsigned short _Value, unsigned char _Shift);
static __inline__
unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift);
static __inline__
unsigned char _rotl8(unsigned char _Value, unsigned char _Shift);
static __inline__
unsigned int __cdecl _rotr(unsigned int _Value, int _Shift);
static __inline__
unsigned short _rotr16(unsigned short _Value, unsigned char _Shift);
static __inline__
unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift);
static __inline__
unsigned char _rotr8(unsigned char _Value, unsigned char _Shift);
int _sarx_i32(int, unsigned int);
/* FIXME: Need definition for jmp_buf.
int __cdecl _setjmp(jmp_buf); */
unsigned int _shlx_u32(unsigned int, unsigned int);
unsigned int _shrx_u32(unsigned int, unsigned int);
void _Store_HLERelease(long volatile *, long);
void _Store64_HLERelease(__int64 volatile *, __int64);
void _StorePointer_HLERelease(void *volatile *, void *);
unsigned int _t1mskc_u32(unsigned int);
unsigned int _tzcnt_u32(unsigned int);
unsigned int _tzcnt_u32(unsigned int);
unsigned int _tzmsk_u32(unsigned int);
static __inline__
void _WriteBarrier(void);
void _xabort(const unsigned int imm);
unsigned __int32 xbegin(void);
void _xend(void);
unsigned __int64 __cdecl _xgetbv(unsigned int);
void __cdecl _xrstor(void const *, unsigned __int64);
void __cdecl _xsave(void *, unsigned __int64);
void __cdecl _xsaveopt(void *, unsigned __int64);
void __cdecl _xsetbv(unsigned int, unsigned __int64);
unsigned char _xtest(void);
/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
#ifdef __x86_64__
void __addgsbyte(unsigned long, unsigned char);
void __addgsdword(unsigned long, unsigned long);
void __addgsqword(unsigned long, unsigned __int64);
void __addgsword(unsigned long, unsigned short);
void __faststorefence(void);
void __incgsbyte(unsigned long);
void __incgsdword(unsigned long);
void __incgsqword(unsigned long);
void __incgsword(unsigned long);
unsigned __int64 __popcnt64(unsigned __int64);
unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
unsigned __int64 _HighPart,
unsigned char _Shift);
unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
unsigned __int64 _HighPart,
unsigned char _Shift);
void __stosq(unsigned __int64 *, unsigned __int64, size_t);
unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64);
unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int);
unsigned __int64 _bextri_u64(unsigned __int64, unsigned int);
static __inline__
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
static __inline__
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
static __inline__
unsigned char _bittest64(__int64 const *, __int64);
static __inline__
unsigned char _bittestandcomplement64(__int64 *, __int64);
static __inline__
unsigned char _bittestandreset64(__int64 *, __int64);
static __inline__
unsigned char _bittestandset64(__int64 *, __int64);
unsigned __int64 _blcfill_u64(unsigned __int64);
unsigned __int64 _blci_u64(unsigned __int64);
unsigned __int64 _blcic_u64(unsigned __int64);
unsigned __int64 _blcmsk_u64(unsigned __int64);
unsigned __int64 _blcs_u64(unsigned __int64);
unsigned __int64 _blsfill_u64(unsigned __int64);
unsigned __int64 _blsi_u64(unsigned __int64);
unsigned __int64 _blsic_u64(unsigned __int64);
unsigned __int64 _blmsk_u64(unsigned __int64);
unsigned __int64 _blsr_u64(unsigned __int64);
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int);
void __cdecl _fxrstor64(void const *);
void __cdecl _fxsave64(void *);
long _InterlockedAnd_np(long volatile *_Value, long _Mask);
short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
char _InterlockedAnd8_np(char volatile *_Value, char _Mask);
unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
long _Comparand);
unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_CompareandResult);
unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
__int64 *_ComparandResult);
short _InterlockedCompareExchange16_np(short volatile *_Destination,
short _Exchange, short _Comparand);
__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,
void *_Exchange, void *_Comparand);
long _InterlockedOr_np(long volatile *_Value, long _Mask);
short _InterlockedOr16_np(short volatile *_Value, short _Mask);
__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);
char _InterlockedOr8_np(char volatile *_Value, char _Mask);
long _InterlockedXor_np(long volatile *_Value, long _Mask);
short _InterlockedXor16_np(short volatile *_Value, short _Mask);
__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);
char _InterlockedXor8_np(char volatile *_Value, char _Mask);
unsigned __int64 _lzcnt_u64(unsigned __int64);
__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand,
__int64 *_HighProduct);
unsigned int __cdecl _readfsbase_u32(void);
unsigned __int64 __cdecl _readfsbase_u64(void);
unsigned int __cdecl _readgsbase_u32(void);
unsigned __int64 __cdecl _readgsbase_u64(void);
unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);
unsigned __int64 _tzcnt_u64(unsigned __int64);
unsigned __int64 _tzmsk_u64(unsigned __int64);
unsigned __int64 _umul128(unsigned __int64 _Multiplier,
unsigned __int64 _Multiplicand,
unsigned __int64 *_HighProduct);
void __cdecl _writefsbase_u32(unsigned int);
void _cdecl _writefsbase_u64(unsigned __int64);
void __cdecl _writegsbase_u32(unsigned int);
void __cdecl _writegsbase_u64(unsigned __int64);
void __cdecl _xrstor64(void const *, unsigned __int64);
void __cdecl _xsave64(void *, unsigned __int64);
void __cdecl _xsaveopt64(void *, unsigned __int64);
#endif /* __x86_64__ */
/*----------------------------------------------------------------------------*\
|* Bit Twiddling
\*----------------------------------------------------------------------------*/
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_rotl8(unsigned char _Value, unsigned char _Shift) {
_Shift &= 0x7;
return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_rotr8(unsigned char _Value, unsigned char _Shift) {
_Shift &= 0x7;
return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value;
}
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
_rotl16(unsigned short _Value, unsigned char _Shift) {
_Shift &= 0xf;
return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value;
}
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
_rotr16(unsigned short _Value, unsigned char _Shift) {
_Shift &= 0xf;
return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value;
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
_rotl(unsigned int _Value, int _Shift) {
_Shift &= 0x1f;
return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
_rotr(unsigned int _Value, int _Shift) {
_Shift &= 0x1f;
return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
}
static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
_lrotl(unsigned long _Value, int _Shift) {
_Shift &= 0x1f;
return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
}
static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
_lrotr(unsigned long _Value, int _Shift) {
_Shift &= 0x1f;
return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
}
static
__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
_rotl64(unsigned __int64 _Value, int _Shift) {
_Shift &= 0x3f;
return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value;
}
static
__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
_rotr64(unsigned __int64 _Value, int _Shift) {
_Shift &= 0x3f;
return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value;
}
/*----------------------------------------------------------------------------*\
|* Bit Counting and Testing
\*----------------------------------------------------------------------------*/
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_BitScanForward(unsigned long *_Index, unsigned long _Mask) {
if (!_Mask)
return 0;
*_Index = __builtin_ctzl(_Mask);
return 1;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_BitScanReverse(unsigned long *_Index, unsigned long _Mask) {
if (!_Mask)
return 0;
*_Index = 31 - __builtin_clzl(_Mask);
return 1;
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
_lzcnt_u32(unsigned int a) {
if (!a)
return 32;
return __builtin_clzl(a);
}
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
__popcnt16(unsigned short value) {
return __builtin_popcount((int)value);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__popcnt(unsigned int value) {
return __builtin_popcount(value);
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_bittest(long const *a, long b) {
return (*a >> b) & 1;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_bittestandcomplement(long *a, long b) {
unsigned char x = (*a >> b) & 1;
*a = *a ^ (1 << b);
return x;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_bittestandreset(long *a, long b) {
unsigned char x = (*a >> b) & 1;
*a = *a & ~(1 << b);
return x;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_bittestandset(long *a, long b) {
unsigned char x = (*a >> b) & 1;
*a = *a | (1 << b);
return x;
}
#ifdef __x86_64__
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) {
if (!_Mask)
return 0;
*_Index = __builtin_ctzll(_Mask);
return 1;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
if (!_Mask)
return 0;
*_Index = 63 - __builtin_clzll(_Mask);
return 1;
}
static
__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
_lzcnt_u64(unsigned __int64 a) {
if (!a)
return 64;
return __builtin_clzll(a);
}
static __inline__
unsigned __int64 __attribute__((__always_inline__, __nodebug__))
__popcnt64(unsigned __int64 value) {
return __builtin_popcountll(value);
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_bittest64(__int64 const *a, __int64 b) {
return (*a >> b) & 1;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_bittestandcomplement64(__int64 *a, __int64 b) {
unsigned char x = (*a >> b) & 1;
*a = *a ^ (1ll << b);
return x;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_bittestandreset64(__int64 *a, __int64 b) {
unsigned char x = (*a >> b) & 1;
*a = *a & ~(1ll << b);
return x;
}
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
_bittestandset64(__int64 *a, __int64 b) {
unsigned char x = (*a >> b) & 1;
*a = *a | (1ll << b);
return x;
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange Add
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) {
return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
}
static __inline__ short __attribute__((__always_inline__, __nodebug__))
_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) {
return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedExchangeAdd(long volatile *_Addend, long _Value) {
return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange Sub
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedExchangeSub8(char volatile *_Subend, char _Value) {
return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
}
static __inline__ short __attribute__((__always_inline__, __nodebug__))
_InterlockedExchangeSub16(short volatile *_Subend, short _Value) {
return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedExchangeSub(long volatile *_Subend, long _Value) {
return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Increment
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedIncrement16(char volatile *_Value) {
return __atomic_add_fetch(_Value, 1, 0);
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedIncrement(long volatile *_Value) {
return __atomic_add_fetch(_Value, 1, 0);
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedIncrement64(__int64 volatile *_Value) {
return __atomic_add_fetch(_Value, 1, 0);
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Decrement
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedDecrement16(char volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, 0);
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedDecrement(long volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, 0);
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedDecrement64(__int64 volatile *_Value) {
return __atomic_sub_fetch(_Value, 1, 0);
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked And
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedAnd8(char volatile *_Value, char _Mask) {
return __atomic_and_fetch(_Value, _Mask, 0);
}
static __inline__ short __attribute__((__always_inline__, __nodebug__))
_InterlockedAnd16(short volatile *_Value, short _Mask) {
return __atomic_and_fetch(_Value, _Mask, 0);
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedAnd(long volatile *_Value, long _Mask) {
return __atomic_and_fetch(_Value, _Mask, 0);
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_and_fetch(_Value, _Mask, 0);
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Or
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedOr8(char volatile *_Value, char _Mask) {
return __atomic_or_fetch(_Value, _Mask, 0);
}
static __inline__ short __attribute__((__always_inline__, __nodebug__))
_InterlockedOr16(short volatile *_Value, short _Mask) {
return __atomic_or_fetch(_Value, _Mask, 0);
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedOr(long volatile *_Value, long _Mask) {
return __atomic_or_fetch(_Value, _Mask, 0);
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_or_fetch(_Value, _Mask, 0);
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Xor
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedXor8(char volatile *_Value, char _Mask) {
return __atomic_xor_fetch(_Value, _Mask, 0);
}
static __inline__ short __attribute__((__always_inline__, __nodebug__))
_InterlockedXor16(short volatile *_Value, short _Mask) {
return __atomic_xor_fetch(_Value, _Mask, 0);
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedXor(long volatile *_Value, long _Mask) {
return __atomic_xor_fetch(_Value, _Mask, 0);
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
return __atomic_xor_fetch(_Value, _Mask, 0);
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Exchange
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedExchange8(char volatile *_Target, char _Value) {
__atomic_exchange(_Target, &_Value, &_Value, 0);
return _Value;
}
static __inline__ short __attribute__((__always_inline__, __nodebug__))
_InterlockedExchange16(short volatile *_Target, short _Value) {
__atomic_exchange(_Target, &_Value, &_Value, 0);
return _Value;
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedExchange(long volatile *_Target, long _Value) {
__atomic_exchange(_Target, &_Value, &_Value, 0);
return _Value;
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
__atomic_exchange(_Target, &_Value, &_Value, 0);
return _Value;
}
#endif
/*----------------------------------------------------------------------------*\
|* Interlocked Compare Exchange
\*----------------------------------------------------------------------------*/
static __inline__ char __attribute__((__always_inline__, __nodebug__))
_InterlockedCompareExchange8(char volatile *_Destination,
char _Exchange, char _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
return _Comparand;
}
static __inline__ short __attribute__((__always_inline__, __nodebug__))
_InterlockedCompareExchange16(short volatile *_Destination,
short _Exchange, short _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
return _Comparand;
}
static __inline__ long __attribute__((__always_inline__, __nodebug__))
_InterlockedCompareExchange(long volatile *_Destination,
long _Exchange, long _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
return _Comparand;
}
#ifdef __x86_64__
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
_InterlockedCompareExchange64(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
return _Comparand;
}
#endif
/*----------------------------------------------------------------------------*\
|* Barriers
\*----------------------------------------------------------------------------*/
static __inline__ void __attribute__((__always_inline__, __nodebug__))
__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
_ReadWriteBarrier(void) {
__asm__ volatile ("" : : : "memory");
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
_ReadBarrier(void) {
__asm__ volatile ("" : : : "memory");
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
_WriteBarrier(void) {
__asm__ volatile ("" : : : "memory");
}
/*----------------------------------------------------------------------------*\
|* Misc
\*----------------------------------------------------------------------------*/
static __inline__ void * __attribute__((__always_inline__, __nodebug__))
_AddressOfReturnAddress(void) {
return (void*)((char*)__builtin_frame_address(0) + sizeof(void*));
}
static __inline__ void * __attribute__((__always_inline__, __nodebug__))
_ReturnAddress(void) {
return __builtin_return_address(0);
}
#ifdef __cplusplus
}
#endif
#endif /* __INTRIN_H */
#endif /* _MSC_VER */

View File

@ -25,6 +25,9 @@
#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead." #error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef __AVX2INTRIN_H
#define __AVX2INTRIN_H
/* SSE4 Multiple Packed Sums of Absolute Difference. */ /* SSE4 Multiple Packed Sums of Absolute Difference. */
#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
@ -750,9 +753,9 @@ _mm256_broadcastsd_pd(__m128d __X)
} }
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm_broadcastsi128_si256(__m128i const *__a) _mm256_broadcastsi128_si256(__m128i __X)
{ {
return (__m256i)__builtin_ia32_vbroadcastsi256(__a); return (__m256i)__builtin_ia32_vbroadcastsi256(__X);
} }
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
@ -1058,7 +1061,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ #define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
__m128i __a = (a); \ __m128i __a = (a); \
int const *__m = (m); \ long long const *__m = (m); \
__m128i __i = (i); \ __m128i __i = (i); \
__m128i __mask = (mask); \ __m128i __mask = (mask); \
(__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \
@ -1066,7 +1069,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ #define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
__m256i __a = (a); \ __m256i __a = (a); \
int const *__m = (m); \ long long const *__m = (m); \
__m128i __i = (i); \ __m128i __i = (i); \
__m256i __mask = (mask); \ __m256i __mask = (mask); \
(__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \
@ -1074,7 +1077,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ #define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
__m128i __a = (a); \ __m128i __a = (a); \
int const *__m = (m); \ long long const *__m = (m); \
__m128i __i = (i); \ __m128i __i = (i); \
__m128i __mask = (mask); \ __m128i __mask = (mask); \
(__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \
@ -1082,7 +1085,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ #define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
__m256i __a = (a); \ __m256i __a = (a); \
int const *__m = (m); \ long long const *__m = (m); \
__m256i __i = (i); \ __m256i __i = (i); \
__m256i __mask = (mask); \ __m256i __mask = (mask); \
(__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \
@ -1173,29 +1176,31 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
(__v4si)_mm_set1_epi32(-1), (s)); }) (__v4si)_mm_set1_epi32(-1), (s)); })
#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ #define _mm_i32gather_epi64(m, i, s) __extension__ ({ \
int const *__m = (m); \ long long const *__m = (m); \
__m128i __i = (i); \ __m128i __i = (i); \
(__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \
(const __v2di *)__m, (__v4si)__i, \ (const __v2di *)__m, (__v4si)__i, \
(__v2di)_mm_set1_epi64x(-1), (s)); }) (__v2di)_mm_set1_epi64x(-1), (s)); })
#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ #define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \
int const *__m = (m); \ long long const *__m = (m); \
__m128i __i = (i); \ __m128i __i = (i); \
(__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \
(const __v4di *)__m, (__v4si)__i, \ (const __v4di *)__m, (__v4si)__i, \
(__v4di)_mm256_set1_epi64x(-1), (s)); }) (__v4di)_mm256_set1_epi64x(-1), (s)); })
#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ #define _mm_i64gather_epi64(m, i, s) __extension__ ({ \
int const *__m = (m); \ long long const *__m = (m); \
__m128i __i = (i); \ __m128i __i = (i); \
(__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \
(const __v2di *)__m, (__v2di)__i, \ (const __v2di *)__m, (__v2di)__i, \
(__v2di)_mm_set1_epi64x(-1), (s)); }) (__v2di)_mm_set1_epi64x(-1), (s)); })
#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ #define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \
int const *__m = (m); \ long long const *__m = (m); \
__m256i __i = (i); \ __m256i __i = (i); \
(__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \
(const __v4di *)__m, (__v4di)__i, \ (const __v4di *)__m, (__v4di)__i, \
(__v4di)_mm256_set1_epi64x(-1), (s)); }) (__v4di)_mm256_set1_epi64x(-1), (s)); })
#endif /* __AVX2INTRIN_H */

View File

@ -25,6 +25,9 @@
#error "Never use <avxintrin.h> directly; include <immintrin.h> instead." #error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef __AVXINTRIN_H
#define __AVXINTRIN_H
typedef double __v4df __attribute__ ((__vector_size__ (32))); typedef double __v4df __attribute__ ((__vector_size__ (32)));
typedef float __v8sf __attribute__ ((__vector_size__ (32))); typedef float __v8sf __attribute__ ((__vector_size__ (32)));
typedef long long __v4di __attribute__ ((__vector_size__ (32))); typedef long long __v4di __attribute__ ((__vector_size__ (32)));
@ -432,21 +435,21 @@ static __inline int __attribute__((__always_inline__, __nodebug__))
_mm256_extract_epi32(__m256i __a, int const __imm) _mm256_extract_epi32(__m256i __a, int const __imm)
{ {
__v8si __b = (__v8si)__a; __v8si __b = (__v8si)__a;
return __b[__imm]; return __b[__imm & 7];
} }
static __inline int __attribute__((__always_inline__, __nodebug__)) static __inline int __attribute__((__always_inline__, __nodebug__))
_mm256_extract_epi16(__m256i __a, int const __imm) _mm256_extract_epi16(__m256i __a, int const __imm)
{ {
__v16hi __b = (__v16hi)__a; __v16hi __b = (__v16hi)__a;
return __b[__imm]; return __b[__imm & 15];
} }
static __inline int __attribute__((__always_inline__, __nodebug__)) static __inline int __attribute__((__always_inline__, __nodebug__))
_mm256_extract_epi8(__m256i __a, int const __imm) _mm256_extract_epi8(__m256i __a, int const __imm)
{ {
__v32qi __b = (__v32qi)__a; __v32qi __b = (__v32qi)__a;
return __b[__imm]; return __b[__imm & 31];
} }
#ifdef __x86_64__ #ifdef __x86_64__
@ -454,7 +457,7 @@ static __inline long long __attribute__((__always_inline__, __nodebug__))
_mm256_extract_epi64(__m256i __a, const int __imm) _mm256_extract_epi64(__m256i __a, const int __imm)
{ {
__v4di __b = (__v4di)__a; __v4di __b = (__v4di)__a;
return __b[__imm]; return __b[__imm & 3];
} }
#endif #endif
@ -1134,22 +1137,19 @@ _mm256_castsi256_si128(__m256i __a)
static __inline __m256d __attribute__((__always_inline__, __nodebug__)) static __inline __m256d __attribute__((__always_inline__, __nodebug__))
_mm256_castpd128_pd256(__m128d __a) _mm256_castpd128_pd256(__m128d __a)
{ {
__m128d __zero = _mm_setzero_pd(); return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
return __builtin_shufflevector(__a, __zero, 0, 1, 2, 2);
} }
static __inline __m256 __attribute__((__always_inline__, __nodebug__)) static __inline __m256 __attribute__((__always_inline__, __nodebug__))
_mm256_castps128_ps256(__m128 __a) _mm256_castps128_ps256(__m128 __a)
{ {
__m128 __zero = _mm_setzero_ps(); return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
return __builtin_shufflevector(__a, __zero, 0, 1, 2, 3, 4, 4, 4, 4);
} }
static __inline __m256i __attribute__((__always_inline__, __nodebug__)) static __inline __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_castsi128_si256(__m128i __a) _mm256_castsi128_si256(__m128i __a)
{ {
__m128i __zero = _mm_setzero_si128(); return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
return __builtin_shufflevector(__a, __zero, 0, 1, 2, 2);
} }
/* SIMD load ops (unaligned) */ /* SIMD load ops (unaligned) */
@ -1220,3 +1220,5 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
__v128 = _mm256_extractf128_si256(__a, 1); __v128 = _mm256_extractf128_si256(__a, 1);
__builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128); __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
} }
#endif /* __AVXINTRIN_H */

View File

@ -25,10 +25,132 @@
#error this header is for x86 only #error this header is for x86 only
#endif #endif
/* Features in %ecx for level 1 */
#define bit_SSE3 0x00000001
#define bit_PCLMULQDQ 0x00000002
#define bit_DTES64 0x00000004
#define bit_MONITOR 0x00000008
#define bit_DSCPL 0x00000010
#define bit_VMX 0x00000020
#define bit_SMX 0x00000040
#define bit_EIST 0x00000080
#define bit_TM2 0x00000100
#define bit_SSSE3 0x00000200
#define bit_CNXTID 0x00000400
#define bit_FMA 0x00001000
#define bit_CMPXCHG16B 0x00002000
#define bit_xTPR 0x00004000
#define bit_PDCM 0x00008000
#define bit_PCID 0x00020000
#define bit_DCA 0x00040000
#define bit_SSE41 0x00080000
#define bit_SSE42 0x00100000
#define bit_x2APIC 0x00200000
#define bit_MOVBE 0x00400000
#define bit_POPCNT 0x00800000
#define bit_TSCDeadline 0x01000000
#define bit_AESNI 0x02000000
#define bit_XSAVE 0x04000000
#define bit_OSXSAVE 0x08000000
#define bit_AVX 0x10000000
#define bit_RDRAND 0x40000000
/* Features in %edx for level 1 */
#define bit_FPU 0x00000001
#define bit_VME 0x00000002
#define bit_DE 0x00000004
#define bit_PSE 0x00000008
#define bit_TSC 0x00000010
#define bit_MSR 0x00000020
#define bit_PAE 0x00000040
#define bit_MCE 0x00000080
#define bit_CX8 0x00000100
#define bit_APIC 0x00000200
#define bit_SEP 0x00000800
#define bit_MTRR 0x00001000
#define bit_PGE 0x00002000
#define bit_MCA 0x00004000
#define bit_CMOV 0x00008000
#define bit_PAT 0x00010000
#define bit_PSE36 0x00020000
#define bit_PSN 0x00040000
#define bit_CLFSH 0x00080000
#define bit_DS 0x00200000
#define bit_ACPI 0x00400000
#define bit_MMX 0x00800000
#define bit_FXSR 0x01000000
#define bit_SSE 0x02000000
#define bit_SSE2 0x04000000
#define bit_SS 0x08000000
#define bit_HTT 0x10000000
#define bit_TM 0x20000000
#define bit_PBE 0x80000000
/* Features in %ebx for level 7 sub-leaf 0 */
#define bit_FSGSBASE 0x00000001
#define bit_SMEP 0x00000080
#define bit_ENH_MOVSB 0x00000200
/* PIC on i386 uses %ebx, so preserve it. */
#if __i386__
#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
__asm(" pushl %%ebx\n" \
" cpuid\n" \
" mov %%ebx,%1\n" \
" popl %%ebx" \
: "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__level))
#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
__asm(" pushl %%ebx\n" \
" cpuid\n" \
" mov %%ebx,%1\n" \
" popl %%ebx" \
: "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__level), "2"(__count))
#else
#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__level))
#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__level), "2"(__count))
#endif
static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
unsigned int *__ebx, unsigned int *__ecx, unsigned int *__ebx, unsigned int *__ecx,
unsigned int *__edx) { unsigned int *__edx) {
__asm("cpuid" : "=a"(*__eax), "=b" (*__ebx), "=c"(*__ecx), "=d"(*__edx) __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx);
: "0"(__level));
return 1; return 1;
} }
static __inline int __get_cpuid_max (unsigned int __level, unsigned int *__sig)
{
unsigned int __eax, __ebx, __ecx, __edx;
#if __i386__
int __cpuid_supported;
__asm(" pushfl\n"
" popl %%eax\n"
" movl %%eax,%%ecx\n"
" xorl $0x00200000,%%eax\n"
" pushl %%eax\n"
" popfl\n"
" pushfl\n"
" popl %%eax\n"
" movl $0,%0\n"
" cmpl %%eax,%%ecx\n"
" je 1f\n"
" movl $1,%0\n"
"1:"
: "=r" (__cpuid_supported) : : "eax", "ecx");
if (!__cpuid_supported)
return 0;
#endif
__cpuid(__level, __eax, __ebx, __ecx, __edx);
if (__sig)
*__sig = __ebx;
return __eax;
}

View File

@ -245,13 +245,15 @@ _mm_cmple_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_sd(__m128d __a, __m128d __b) _mm_cmpgt_sd(__m128d __a, __m128d __b)
{ {
return (__m128d)__builtin_ia32_cmpsd(__b, __a, 1); __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
return (__m128d) { __c[0], __a[1] };
} }
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_sd(__m128d __a, __m128d __b) _mm_cmpge_sd(__m128d __a, __m128d __b)
{ {
return (__m128d)__builtin_ia32_cmpsd(__b, __a, 2); __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
return (__m128d) { __c[0], __a[1] };
} }
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
@ -287,13 +289,15 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b)
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_sd(__m128d __a, __m128d __b) _mm_cmpngt_sd(__m128d __a, __m128d __b)
{ {
return (__m128d)__builtin_ia32_cmpsd(__b, __a, 5); __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
return (__m128d) { __c[0], __a[1] };
} }
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_sd(__m128d __a, __m128d __b) _mm_cmpnge_sd(__m128d __a, __m128d __b)
{ {
return (__m128d)__builtin_ia32_cmpsd(__b, __a, 6); __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
return (__m128d) { __c[0], __a[1] };
} }
static __inline__ int __attribute__((__always_inline__, __nodebug__)) static __inline__ int __attribute__((__always_inline__, __nodebug__))
@ -822,7 +826,9 @@ _mm_xor_si128(__m128i __a, __m128i __b)
} }
#define _mm_slli_si128(a, count) __extension__ ({ \ #define _mm_slli_si128(a, count) __extension__ ({ \
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
__m128i __a = (a); \ __m128i __a = (a); \
_Pragma("clang diagnostic pop"); \
(__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); }) (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
@ -887,7 +893,9 @@ _mm_sra_epi32(__m128i __a, __m128i __count)
#define _mm_srli_si128(a, count) __extension__ ({ \ #define _mm_srli_si128(a, count) __extension__ ({ \
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
__m128i __a = (a); \ __m128i __a = (a); \
_Pragma("clang diagnostic pop"); \
(__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); }) (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
@ -1210,6 +1218,14 @@ _mm_stream_si32(int *__p, int __a)
__builtin_ia32_movnti(__p, __a); __builtin_ia32_movnti(__p, __a);
} }
#ifdef __x86_64__
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_si64(long long *__p, long long __a)
{
__builtin_ia32_movnti64(__p, __a);
}
#endif
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_clflush(void const *__p) _mm_clflush(void const *__p)
{ {
@ -1250,7 +1266,7 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_extract_epi16(__m128i __a, int __imm) _mm_extract_epi16(__m128i __a, int __imm)
{ {
__v8hi __b = (__v8hi)__a; __v8hi __b = (__v8hi)__a;
return (unsigned short)__b[__imm]; return (unsigned short)__b[__imm & 7];
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
@ -1268,20 +1284,26 @@ _mm_movemask_epi8(__m128i __a)
} }
#define _mm_shuffle_epi32(a, imm) __extension__ ({ \ #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
__m128i __a = (a); \ __m128i __a = (a); \
_Pragma("clang diagnostic pop"); \
(__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \ (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \ (imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); }) ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
__m128i __a = (a); \ __m128i __a = (a); \
_Pragma("clang diagnostic pop"); \
(__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
(imm) & 0x3, ((imm) & 0xc) >> 2, \ (imm) & 0x3, ((imm) & 0xc) >> 2, \
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
4, 5, 6, 7); }) 4, 5, 6, 7); })
#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
__m128i __a = (a); \ __m128i __a = (a); \
_Pragma("clang diagnostic pop"); \
(__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
0, 1, 2, 3, \ 0, 1, 2, 3, \
4 + (((imm) & 0x03) >> 0), \ 4 + (((imm) & 0x03) >> 0), \
@ -1344,7 +1366,7 @@ _mm_movepi64_pi64(__m128i __a)
} }
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_movpi64_pi64(__m64 __a) _mm_movpi64_epi64(__m64 __a)
{ {
return (__m128i){ (long long)__a, 0 }; return (__m128i){ (long long)__a, 0 };
} }
@ -1374,8 +1396,10 @@ _mm_movemask_pd(__m128d __a)
} }
#define _mm_shuffle_pd(a, b, i) __extension__ ({ \ #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
__m128d __a = (a); \ __m128d __a = (a); \
__m128d __b = (b); \ __m128d __b = (b); \
_Pragma("clang diagnostic pop"); \
__builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); }) __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))

View File

@ -1,6 +1,6 @@
/*===---- f16cintrin.h - F16C intrinsics ---------------------------------=== /*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
* *
* Permission is hereby granted, free of charge, to any person obtaining __a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights * in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

View File

@ -111,4 +111,8 @@ _xtest(void)
} }
#endif #endif
#ifdef __SHA__
#include <shaintrin.h>
#endif
#endif /* __IMMINTRIN_H */ #endif /* __IMMINTRIN_H */

View File

@ -87,8 +87,10 @@
#define CHAR_MAX __SCHAR_MAX__ #define CHAR_MAX __SCHAR_MAX__
#endif #endif
/* C99 5.2.4.2.1: Added long long. */ /* C99 5.2.4.2.1: Added long long.
#if __STDC_VERSION__ >= 199901 C++11 18.3.3.2: same contents as the Standard C Library header <limits.h>.
*/
#if __STDC_VERSION__ >= 199901 || __cplusplus >= 201103L
#undef LLONG_MIN #undef LLONG_MIN
#undef LLONG_MAX #undef LLONG_MAX

View File

@ -4,6 +4,16 @@ module _Builtin_intrinsics [system] {
header "altivec.h" header "altivec.h"
} }
explicit module arm {
requires arm
explicit module neon {
requires neon
header "arm_neon.h"
export *
}
}
explicit module intel { explicit module intel {
requires x86 requires x86
export * export *
@ -34,6 +44,7 @@ module _Builtin_intrinsics [system] {
explicit module sse { explicit module sse {
requires sse requires sse
export mmx export mmx
export * // note: for hackish <emmintrin.h> dependency
header "xmmintrin.h" header "xmmintrin.h"
} }

View File

@ -25,6 +25,9 @@
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead." #error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
#endif #endif
#ifndef __PRFCHWINTRIN_H
#define __PRFCHWINTRIN_H
#if defined(__PRFCHW__) || defined(__3dNOW__) #if defined(__PRFCHW__) || defined(__3dNOW__)
static __inline__ void __attribute__((__always_inline__, __nodebug__)) static __inline__ void __attribute__((__always_inline__, __nodebug__))
_m_prefetchw(void *__P) _m_prefetchw(void *__P)
@ -32,3 +35,5 @@ _m_prefetchw(void *__P)
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
} }
#endif #endif
#endif /* __PRFCHWINTRIN_H */

View File

@ -25,6 +25,9 @@
#error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead." #error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
#endif #endif
#ifndef __RDSEEDINTRIN_H
#define __RDSEEDINTRIN_H
#ifdef __RDSEED__ #ifdef __RDSEED__
static __inline__ int __attribute__((__always_inline__, __nodebug__)) static __inline__ int __attribute__((__always_inline__, __nodebug__))
_rdseed16_step(unsigned short *__p) _rdseed16_step(unsigned short *__p)
@ -46,3 +49,4 @@ _rdseed64_step(unsigned long long *__p)
} }
#endif #endif
#endif /* __RDSEED__ */ #endif /* __RDSEED__ */
#endif /* __RDSEEDINTRIN_H */

View File

@ -25,6 +25,9 @@
#error "Never use <rtmintrin.h> directly; include <immintrin.h> instead." #error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef __RTMINTRIN_H
#define __RTMINTRIN_H
#define _XBEGIN_STARTED (~0u) #define _XBEGIN_STARTED (~0u)
#define _XABORT_EXPLICIT (1 << 0) #define _XABORT_EXPLICIT (1 << 0)
#define _XABORT_RETRY (1 << 1) #define _XABORT_RETRY (1 << 1)
@ -47,3 +50,5 @@ _xend(void)
} }
#define _xabort(imm) __builtin_ia32_xabort((imm)) #define _xabort(imm) __builtin_ia32_xabort((imm))
#endif /* __RTMINTRIN_H */

View File

@ -0,0 +1,74 @@
/*===---- shaintrin.h - SHA intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __IMMINTRIN_H
#error "Never use <shaintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef __SHAINTRIN_H
#define __SHAINTRIN_H
#if !defined (__SHA__)
# error "SHA instructions not enabled"
#endif
#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
__builtin_ia32_sha1rnds4((V1), (V2), (M)); })
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
{
return __builtin_ia32_sha1nexte(__X, __Y);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha1msg1_epu32(__m128i __X, __m128i __Y)
{
return __builtin_ia32_sha1msg1(__X, __Y);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha1msg2_epu32(__m128i __X, __m128i __Y)
{
return __builtin_ia32_sha1msg2(__X, __Y);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)
{
return __builtin_ia32_sha256rnds2(__X, __Y, __Z);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha256msg1_epu32(__m128i __X, __m128i __Y)
{
return __builtin_ia32_sha256msg1(__X, __Y);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sha256msg2_epu32(__m128i __X, __m128i __Y)
{
return __builtin_ia32_sha256msg2(__X, __Y);
}
#endif /* __SHAINTRIN_H */

View File

@ -197,7 +197,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
#define _mm_extract_ps(X, N) (__extension__ \ #define _mm_extract_ps(X, N) (__extension__ \
({ union { int __i; float __f; } __t; \ ({ union { int __i; float __f; } __t; \
__v4sf __a = (__v4sf)(X); \ __v4sf __a = (__v4sf)(X); \
__t.__f = __a[N]; \ __t.__f = __a[(N) & 3]; \
__t.__i;})) __t.__i;}))
/* Miscellaneous insert and extract macros. */ /* Miscellaneous insert and extract macros. */
@ -215,14 +215,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/* Insert int into packed integer array at index. */ /* Insert int into packed integer array at index. */
#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ #define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
__a[(N)] = (I); \ __a[(N) & 15] = (I); \
__a;})) __a;}))
#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ #define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
__a[(N)] = (I); \ __a[(N) & 3] = (I); \
__a;})) __a;}))
#ifdef __x86_64__ #ifdef __x86_64__
#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ #define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
__a[(N)] = (I); \ __a[(N) & 1] = (I); \
__a;})) __a;}))
#endif /* __x86_64__ */ #endif /* __x86_64__ */
@ -230,12 +230,13 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
* as a zero extended value, so it is unsigned. * as a zero extended value, so it is unsigned.
*/ */
#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ #define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
(unsigned char)__a[(N)];})) (int)(unsigned char) \
__a[(N) & 15];}))
#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ #define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
(unsigned)__a[(N)];})) __a[(N) & 3];}))
#ifdef __x86_64__ #ifdef __x86_64__
#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ #define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
__a[(N)];})) __a[(N) & 1];}))
#endif /* __x86_64 */ #endif /* __x86_64 */
/* SSE4 128-bit Packed Integer Comparisons. */ /* SSE4 128-bit Packed Integer Comparisons. */

View File

@ -0,0 +1,158 @@
/*===---- tbmintrin.h - TBM intrinsics -------------------------------------===
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*===-----------------------------------------------------------------------===
*/
#ifndef __TBM__
#error "TBM instruction set is not enabled"
#endif
#ifndef __X86INTRIN_H
#error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __TBMINTRIN_H
#define __TBMINTRIN_H
#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b)))
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__blcfill_u32(unsigned int a)
{
return a & (a + 1);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__blci_u32(unsigned int a)
{
return a | ~(a + 1);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__blcic_u32(unsigned int a)
{
return ~a & (a + 1);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__blcmsk_u32(unsigned int a)
{
return a ^ (a + 1);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__blcs_u32(unsigned int a)
{
return a | (a + 1);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__blsfill_u32(unsigned int a)
{
return a | (a - 1);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__blsic_u32(unsigned int a)
{
return ~a | (a - 1);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__t1mskc_u32(unsigned int a)
{
return ~a | (a + 1);
}
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
__tzmsk_u32(unsigned int a)
{
return ~a & (a - 1);
}
#ifdef __x86_64__
#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b)))
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__blcfill_u64(unsigned long long a)
{
return a & (a + 1);
}
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__blci_u64(unsigned long long a)
{
return a | ~(a + 1);
}
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__blcic_u64(unsigned long long a)
{
return ~a & (a + 1);
}
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__blcmsk_u64(unsigned long long a)
{
return a ^ (a + 1);
}
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__blcs_u64(unsigned long long a)
{
return a | (a + 1);
}
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__blsfill_u64(unsigned long long a)
{
return a | (a - 1);
}
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__blsic_u64(unsigned long long a)
{
return ~a | (a - 1);
}
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__t1mskc_u64(unsigned long long a)
{
return ~a | (a + 1);
}
static __inline__ unsigned long long __attribute__((__always_inline__,
__nodebug__))
__tzmsk_u64(unsigned long long a)
{
return ~a & (a - 1);
}
#endif
#endif /* __TBMINTRIN_H */

View File

@ -1340,15 +1340,15 @@ static long double _Complex
// creal // creal
static float _Complex static float
_TG_ATTRS _TG_ATTRS
__tg_creal(float __x) {return __x;} __tg_creal(float __x) {return __x;}
static double _Complex static double
_TG_ATTRS _TG_ATTRS
__tg_creal(double __x) {return __x;} __tg_creal(double __x) {return __x;}
static long double _Complex static long double
_TG_ATTRS _TG_ATTRS
__tg_creal(long double __x) {return __x;} __tg_creal(long double __x) {return __x;}

View File

@ -27,8 +27,8 @@
#define __CLANG_UNWIND_H #define __CLANG_UNWIND_H
#if __has_include_next(<unwind.h>) #if __has_include_next(<unwind.h>)
/* Darwin and libunwind provide an unwind.h. If that's available, use /* Darwin (from 11.x on) and libunwind provide an unwind.h. If that's available,
* it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
* so define that around the include.*/ * so define that around the include.*/
# ifndef _GNU_SOURCE # ifndef _GNU_SOURCE
# define _SHOULD_UNDEFINE_GNU_SOURCE # define _SHOULD_UNDEFINE_GNU_SOURCE
@ -66,7 +66,17 @@ extern "C" {
#pragma GCC visibility push(default) #pragma GCC visibility push(default)
#endif #endif
typedef uintptr_t _Unwind_Word;
typedef intptr_t _Unwind_Sword;
typedef uintptr_t _Unwind_Ptr;
typedef uintptr_t _Unwind_Internal_Ptr;
typedef uint64_t _Unwind_Exception_Class;
typedef intptr_t _sleb128_t;
typedef uintptr_t _uleb128_t;
struct _Unwind_Context; struct _Unwind_Context;
struct _Unwind_Exception;
typedef enum { typedef enum {
_URC_NO_REASON = 0, _URC_NO_REASON = 0,
_URC_FOREIGN_EXCEPTION_CAUGHT = 1, _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
@ -81,8 +91,43 @@ typedef enum {
_URC_CONTINUE_UNWIND = 8 _URC_CONTINUE_UNWIND = 8
} _Unwind_Reason_Code; } _Unwind_Reason_Code;
typedef enum {
_UA_SEARCH_PHASE = 1,
_UA_CLEANUP_PHASE = 2,
#ifdef __arm__ _UA_HANDLER_FRAME = 4,
_UA_FORCE_UNWIND = 8,
_UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */
} _Unwind_Action;
typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
struct _Unwind_Exception *);
struct _Unwind_Exception {
_Unwind_Exception_Class exception_class;
_Unwind_Exception_Cleanup_Fn exception_cleanup;
_Unwind_Word private_1;
_Unwind_Word private_2;
/* The Itanium ABI requires that _Unwind_Exception objects are "double-word
* aligned". GCC has interpreted this to mean "use the maximum useful
* alignment for the target"; so do we. */
} __attribute__((__aligned__));
typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
_Unwind_Exception_Class,
struct _Unwind_Exception *,
struct _Unwind_Context *,
void *);
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
struct _Unwind_Context *);
typedef _Unwind_Personality_Fn __personality_routine;
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
void *);
#if defined(__arm__) && !defined(__APPLE__)
typedef enum { typedef enum {
_UVRSC_CORE = 0, /* integer register */ _UVRSC_CORE = 0, /* integer register */
@ -111,14 +156,116 @@ _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
_Unwind_VRS_DataRepresentation __representation, _Unwind_VRS_DataRepresentation __representation,
void *__valuep); void *__valuep);
#else _Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,
_Unwind_VRS_RegClass __regclass,
uint32_t __regno,
_Unwind_VRS_DataRepresentation __representation,
void *__valuep);
uintptr_t _Unwind_GetIP(struct _Unwind_Context* __context); static __inline__
_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {
_Unwind_Word __value;
_Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);
return __value;
}
static __inline__
void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,
_Unwind_Word __value) {
_Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);
}
static __inline__
_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {
_Unwind_Word __ip = _Unwind_GetGR(__context, 15);
return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */
}
static __inline__
void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {
_Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;
_Unwind_SetGR(__context, 15, __value | __thumb_mode_bit);
}
#else
_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);
void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);
_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);
void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);
#endif
_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);
_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);
void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);
_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
/* DWARF EH functions; currently not available on Darwin/ARM */
#if !defined(__APPLE__) || !defined(__arm__)
_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
_Unwind_Stop_Fn, void *);
void _Unwind_DeleteException(struct _Unwind_Exception *);
void _Unwind_Resume(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
#endif
_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
/* setjmp(3)/longjmp(3) stuff */
typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;
void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);
void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *,
_Unwind_Stop_Fn, void *);
void _Unwind_SjLj_Resume(struct _Unwind_Exception *);
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *);
void *_Unwind_FindEnclosingFunction(void *);
#ifdef __APPLE__
_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)
__attribute__((unavailable));
_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)
__attribute__((unavailable));
/* Darwin-specific functions */
void __register_frame(const void *);
void __deregister_frame(const void *);
struct dwarf_eh_bases {
uintptr_t tbase;
uintptr_t dbase;
uintptr_t func;
};
void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);
void __register_frame_info_bases(const void *, void *, void *, void *)
__attribute__((unavailable));
void __register_frame_info(const void *, void *) __attribute__((unavailable));
void __register_frame_info_table_bases(const void *, void*, void *, void *)
__attribute__((unavailable));
void __register_frame_info_table(const void *, void *)
__attribute__((unavailable));
void __register_frame_table(const void *) __attribute__((unavailable));
void __deregister_frame_info(const void *) __attribute__((unavailable));
void __deregister_frame_info_bases(const void *)__attribute__((unavailable));
#else
_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);
_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);
#endif #endif
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context*, void*);
_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void*);
#ifndef HIDE_EXPORTS #ifndef HIDE_EXPORTS
#pragma GCC visibility pop #pragma GCC visibility pop

View File

@ -66,6 +66,10 @@
#include <xopintrin.h> #include <xopintrin.h>
#endif #endif
#ifdef __TBM__
#include <tbmintrin.h>
#endif
#ifdef __F16C__ #ifdef __F16C__
#include <f16cintrin.h> #include <f16cintrin.h>
#endif #endif

View File

@ -218,7 +218,9 @@ _mm_cmple_ps(__m128 __a, __m128 __b)
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_ss(__m128 __a, __m128 __b) _mm_cmpgt_ss(__m128 __a, __m128 __b)
{ {
return (__m128)__builtin_ia32_cmpss(__b, __a, 1); return (__m128)__builtin_shufflevector(__a,
__builtin_ia32_cmpss(__b, __a, 1),
4, 1, 2, 3);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@ -230,7 +232,9 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b)
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_ss(__m128 __a, __m128 __b) _mm_cmpge_ss(__m128 __a, __m128 __b)
{ {
return (__m128)__builtin_ia32_cmpss(__b, __a, 2); return (__m128)__builtin_shufflevector(__a,
__builtin_ia32_cmpss(__b, __a, 2),
4, 1, 2, 3);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@ -278,7 +282,9 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b)
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_ss(__m128 __a, __m128 __b) _mm_cmpngt_ss(__m128 __a, __m128 __b)
{ {
return (__m128)__builtin_ia32_cmpss(__b, __a, 5); return (__m128)__builtin_shufflevector(__a,
__builtin_ia32_cmpss(__b, __a, 5),
4, 1, 2, 3);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@ -290,7 +296,9 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b)
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_ss(__m128 __a, __m128 __b) _mm_cmpnge_ss(__m128 __a, __m128 __b)
{ {
return (__m128)__builtin_ia32_cmpss(__b, __a, 6); return (__m128)__builtin_shufflevector(__a,
__builtin_ia32_cmpss(__b, __a, 6),
4, 1, 2, 3);
} }
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
@ -983,12 +991,10 @@ do { \
#define _m_ _mm_ #define _m_ _mm_
#define _m_ _mm_ #define _m_ _mm_
#if !__has_feature(modules)
/* Ugly hack for backwards-compatibility (compatible with gcc) */ /* Ugly hack for backwards-compatibility (compatible with gcc) */
#ifdef __SSE2__ #ifdef __SSE2__
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
#endif
#endif /* __SSE__ */ #endif /* __SSE__ */

View File

@ -342,6 +342,399 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
__m128i __B = (B); \ __m128i __B = (B); \
(__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
#define _MM_PCOMCTRL_LT 0
#define _MM_PCOMCTRL_LE 1
#define _MM_PCOMCTRL_GT 2
#define _MM_PCOMCTRL_GE 3
#define _MM_PCOMCTRL_EQ 4
#define _MM_PCOMCTRL_NEQ 5
#define _MM_PCOMCTRL_FALSE 6
#define _MM_PCOMCTRL_TRUE 7
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epu8(__m128i __A, __m128i __B)
{
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epu8(__m128i __A, __m128i __B)
{
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epu8(__m128i __A, __m128i __B)
{
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epu8(__m128i __A, __m128i __B)
{
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epu8(__m128i __A, __m128i __B)
{
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epu8(__m128i __A, __m128i __B)
{
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epu8(__m128i __A, __m128i __B)
{
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epu8(__m128i __A, __m128i __B)
{
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epu16(__m128i __A, __m128i __B)
{
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epu16(__m128i __A, __m128i __B)
{
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epu16(__m128i __A, __m128i __B)
{
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epu16(__m128i __A, __m128i __B)
{
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epu16(__m128i __A, __m128i __B)
{
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epu16(__m128i __A, __m128i __B)
{
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epu16(__m128i __A, __m128i __B)
{
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epu16(__m128i __A, __m128i __B)
{
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epu32(__m128i __A, __m128i __B)
{
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epu32(__m128i __A, __m128i __B)
{
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epu32(__m128i __A, __m128i __B)
{
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epu32(__m128i __A, __m128i __B)
{
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epu32(__m128i __A, __m128i __B)
{
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epu32(__m128i __A, __m128i __B)
{
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epu32(__m128i __A, __m128i __B)
{
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epu32(__m128i __A, __m128i __B)
{
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epu64(__m128i __A, __m128i __B)
{
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epu64(__m128i __A, __m128i __B)
{
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epu64(__m128i __A, __m128i __B)
{
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epu64(__m128i __A, __m128i __B)
{
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epu64(__m128i __A, __m128i __B)
{
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epu64(__m128i __A, __m128i __B)
{
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epu64(__m128i __A, __m128i __B)
{
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epu64(__m128i __A, __m128i __B)
{
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epi8(__m128i __A, __m128i __B)
{
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epi8(__m128i __A, __m128i __B)
{
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epi8(__m128i __A, __m128i __B)
{
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epi8(__m128i __A, __m128i __B)
{
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epi8(__m128i __A, __m128i __B)
{
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epi8(__m128i __A, __m128i __B)
{
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epi8(__m128i __A, __m128i __B)
{
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epi8(__m128i __A, __m128i __B)
{
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epi16(__m128i __A, __m128i __B)
{
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epi16(__m128i __A, __m128i __B)
{
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epi16(__m128i __A, __m128i __B)
{
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epi16(__m128i __A, __m128i __B)
{
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epi16(__m128i __A, __m128i __B)
{
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epi16(__m128i __A, __m128i __B)
{
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epi16(__m128i __A, __m128i __B)
{
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epi16(__m128i __A, __m128i __B)
{
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epi32(__m128i __A, __m128i __B)
{
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epi32(__m128i __A, __m128i __B)
{
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epi32(__m128i __A, __m128i __B)
{
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epi32(__m128i __A, __m128i __B)
{
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epi32(__m128i __A, __m128i __B)
{
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epi32(__m128i __A, __m128i __B)
{
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epi32(__m128i __A, __m128i __B)
{
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epi32(__m128i __A, __m128i __B)
{
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comlt_epi64(__m128i __A, __m128i __B)
{
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comle_epi64(__m128i __A, __m128i __B)
{
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comgt_epi64(__m128i __A, __m128i __B)
{
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comge_epi64(__m128i __A, __m128i __B)
{
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comeq_epi64(__m128i __A, __m128i __B)
{
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comneq_epi64(__m128i __A, __m128i __B)
{
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comfalse_epi64(__m128i __A, __m128i __B)
{
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
}
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_comtrue_epi64(__m128i __A, __m128i __B)
{
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
}
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
__m128d __X = (X); \ __m128d __X = (X); \
__m128d __Y = (Y); \ __m128d __Y = (Y); \