Updating to clang 3.4 builtin headers
This commit is contained in:
parent
2ebea627d9
commit
f30857df22
784
python/clang_includes/Intrin.h
Normal file
784
python/clang_includes/Intrin.h
Normal file
@ -0,0 +1,784 @@
|
||||
/* ===-------- Intrin.h ---------------------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/* Only include this if we're compiling for the windows platform. */
|
||||
#ifndef _MSC_VER
|
||||
#include_next <Intrin.h>
|
||||
#else
|
||||
|
||||
#ifndef __INTRIN_H
|
||||
#define __INTRIN_H
|
||||
|
||||
/* First include the standard intrinsics. */
|
||||
#include <x86intrin.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* And the random ones that aren't in those files. */
|
||||
__m64 _m_from_float(float);
|
||||
__m64 _m_from_int(int _l);
|
||||
void _m_prefetch(void *);
|
||||
float _m_to_float(__m64);
|
||||
int _m_to_int(__m64 _M);
|
||||
|
||||
/* Other assorted instruction intrinsics. */
|
||||
void __addfsbyte(unsigned long, unsigned char);
|
||||
void __addfsdword(unsigned long, unsigned long);
|
||||
void __addfsword(unsigned long, unsigned short);
|
||||
void __code_seg(const char *);
|
||||
void __cpuid(int[4], int);
|
||||
void __cpuidex(int[4], int, int);
|
||||
void __debugbreak(void);
|
||||
__int64 __emul(int, int);
|
||||
unsigned __int64 __emulu(unsigned int, unsigned int);
|
||||
void __cdecl __fastfail(unsigned int);
|
||||
unsigned int __getcallerseflags(void);
|
||||
void __halt(void);
|
||||
unsigned char __inbyte(unsigned short);
|
||||
void __inbytestring(unsigned short, unsigned char *, unsigned long);
|
||||
void __incfsbyte(unsigned long);
|
||||
void __incfsdword(unsigned long);
|
||||
void __incfsword(unsigned long);
|
||||
unsigned long __indword(unsigned short);
|
||||
void __indwordstring(unsigned short, unsigned long *, unsigned long);
|
||||
void __int2c(void);
|
||||
void __invlpg(void *);
|
||||
unsigned short __inword(unsigned short);
|
||||
void __inwordstring(unsigned short, unsigned short *, unsigned long);
|
||||
void __lidt(void *);
|
||||
unsigned __int64 __ll_lshift(unsigned __int64, int);
|
||||
__int64 __ll_rshift(__int64, int);
|
||||
void __llwpcb(void *);
|
||||
unsigned char __lwpins32(unsigned int, unsigned int, unsigned int);
|
||||
void __lwpval32(unsigned int, unsigned int, unsigned int);
|
||||
unsigned int __lzcnt(unsigned int);
|
||||
unsigned short __lzcnt16(unsigned short);
|
||||
void __movsb(unsigned char *, unsigned char const *, size_t);
|
||||
void __movsd(unsigned long *, unsigned long const *, size_t);
|
||||
void __movsw(unsigned short *, unsigned short const *, size_t);
|
||||
void __nop(void);
|
||||
void __nvreg_restore_fence(void);
|
||||
void __nvreg_save_fence(void);
|
||||
void __outbyte(unsigned short, unsigned char);
|
||||
void __outbytestring(unsigned short, unsigned char *, unsigned long);
|
||||
void __outdword(unsigned short, unsigned long);
|
||||
void __outdwordstring(unsigned short, unsigned long *, unsigned long);
|
||||
void __outword(unsigned short, unsigned short);
|
||||
void __outwordstring(unsigned short, unsigned short *, unsigned long);
|
||||
static __inline__
|
||||
unsigned int __popcnt(unsigned int);
|
||||
static __inline__
|
||||
unsigned short __popcnt16(unsigned short);
|
||||
unsigned __int64 __rdtsc(void);
|
||||
unsigned __int64 __rdtscp(unsigned int *);
|
||||
unsigned long __readcr0(void);
|
||||
unsigned long __readcr2(void);
|
||||
unsigned long __readcr3(void);
|
||||
unsigned long __readcr5(void);
|
||||
unsigned long __readcr8(void);
|
||||
unsigned int __readdr(unsigned int);
|
||||
unsigned int __readeflags(void);
|
||||
unsigned char __readfsbyte(unsigned long);
|
||||
unsigned long __readfsdword(unsigned long);
|
||||
unsigned __int64 __readfsqword(unsigned long);
|
||||
unsigned short __readfsword(unsigned long);
|
||||
unsigned __int64 __readmsr(unsigned long);
|
||||
unsigned __int64 __readpmc(unsigned long);
|
||||
unsigned long __segmentlimit(unsigned long);
|
||||
void __sidt(void *);
|
||||
void *__slwpcb(void);
|
||||
void __stosb(unsigned char *, unsigned char, size_t);
|
||||
void __stosd(unsigned long *, unsigned long, size_t);
|
||||
void __stosw(unsigned short *, unsigned short, size_t);
|
||||
void __svm_clgi(void);
|
||||
void __svm_invlpga(void *, int);
|
||||
void __svm_skinit(int);
|
||||
void __svm_stgi(void);
|
||||
void __svm_vmload(size_t);
|
||||
void __svm_vmrun(size_t);
|
||||
void __svm_vmsave(size_t);
|
||||
void __ud2(void);
|
||||
unsigned __int64 __ull_rshift(unsigned __int64, int);
|
||||
void __vmx_off(void);
|
||||
void __vmx_vmptrst(unsigned __int64 *);
|
||||
void __wbinvd(void);
|
||||
void __writecr0(unsigned int);
|
||||
void __writecr3(unsigned int);
|
||||
void __writecr4(unsigned int);
|
||||
void __writecr8(unsigned int);
|
||||
void __writedr(unsigned int, unsigned int);
|
||||
void __writeeflags(unsigned int);
|
||||
void __writefsbyte(unsigned long, unsigned char);
|
||||
void __writefsdword(unsigned long, unsigned long);
|
||||
void __writefsqword(unsigned long, unsigned __int64);
|
||||
void __writefsword(unsigned long, unsigned short);
|
||||
void __writemsr(unsigned long, unsigned __int64);
|
||||
static __inline__
|
||||
void *_AddressOfReturnAddress(void);
|
||||
unsigned int _andn_u32(unsigned int, unsigned int);
|
||||
unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
|
||||
unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
|
||||
unsigned int _bextri_u32(unsigned int, unsigned int);
|
||||
static __inline__
|
||||
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
|
||||
static __inline__
|
||||
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
|
||||
static __inline__
|
||||
unsigned char _bittest(long const *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandcomplement(long *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandreset(long *, long);
|
||||
static __inline__
|
||||
unsigned char _bittestandset(long *, long);
|
||||
unsigned int _blcfill_u32(unsigned int);
|
||||
unsigned int _blci_u32(unsigned int);
|
||||
unsigned int _blcic_u32(unsigned int);
|
||||
unsigned int _blcmsk_u32(unsigned int);
|
||||
unsigned int _blcs_u32(unsigned int);
|
||||
unsigned int _blsfill_u32(unsigned int);
|
||||
unsigned int _blsi_u32(unsigned int);
|
||||
unsigned int _blsic_u32(unsigned int);
|
||||
unsigned int _blsmsk_u32(unsigned int);
|
||||
unsigned int _blsmsk_u32(unsigned int);
|
||||
unsigned int _blsr_u32(unsigned int);
|
||||
unsigned int _blsr_u32(unsigned int);
|
||||
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
|
||||
unsigned long __cdecl _byteswap_ulong(unsigned long);
|
||||
unsigned short __cdecl _byteswap_ushort(unsigned short);
|
||||
unsigned _bzhi_u32(unsigned int, unsigned int);
|
||||
void __cdecl _disable(void);
|
||||
void __cdecl _enable(void);
|
||||
void __cdecl _fxrstor(void const *);
|
||||
void __cdecl _fxsave(void *);
|
||||
long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);
|
||||
static __inline__
|
||||
long _InterlockedAnd(long volatile *_Value, long _Mask);
|
||||
static __inline__
|
||||
short _InterlockedAnd16(short volatile *_Value, short _Mask);
|
||||
static __inline__
|
||||
char _InterlockedAnd8(char volatile *_Value, char _Mask);
|
||||
unsigned char _interlockedbittestandreset(long volatile *, long);
|
||||
unsigned char _interlockedbittestandset(long volatile *, long);
|
||||
static __inline__
|
||||
long __cdecl _InterlockedCompareExchange(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long);
|
||||
long _InterlockedCompareExchange_HLERelease(long volatile *, long, long);
|
||||
static __inline__
|
||||
short _InterlockedCompareExchange16(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
static __inline__
|
||||
__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64,
|
||||
__int64);
|
||||
__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64,
|
||||
__int64);
|
||||
static __inline__
|
||||
char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange,
|
||||
char _Comparand);
|
||||
void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,
|
||||
void *);
|
||||
void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,
|
||||
void *);
|
||||
static __inline__
|
||||
long __cdecl _InterlockedDecrement(long volatile *_Addend);
|
||||
static __inline__
|
||||
short _InterlockedDecrement16(short volatile *_Addend);
|
||||
static __inline__
|
||||
long __cdecl _InterlockedExchange(long volatile *_Target, long _Value);
|
||||
static __inline__
|
||||
short _InterlockedExchange16(short volatile *_Target, short _Value);
|
||||
static __inline__
|
||||
char _InterlockedExchange8(char volatile *_Target, char _Value);
|
||||
static __inline__
|
||||
long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);
|
||||
long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
|
||||
static __inline__
|
||||
char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value);
|
||||
static __inline__
|
||||
long __cdecl _InterlockedIncrement(long volatile *_Addend);
|
||||
static __inline__
|
||||
short _InterlockedIncrement16(short volatile *_Addend);
|
||||
static __inline__
|
||||
long _InterlockedOr(long volatile *_Value, long _Mask);
|
||||
static __inline__
|
||||
short _InterlockedOr16(short volatile *_Value, short _Mask);
|
||||
static __inline__
|
||||
char _InterlockedOr8(char volatile *_Value, char _Mask);
|
||||
static __inline__
|
||||
long _InterlockedXor(long volatile *_Value, long _Mask);
|
||||
static __inline__
|
||||
short _InterlockedXor16(short volatile *_Value, short _Mask);
|
||||
static __inline__
|
||||
char _InterlockedXor8(char volatile *_Value, char _Mask);
|
||||
void __cdecl _invpcid(unsigned int, void *);
|
||||
static __inline__
|
||||
unsigned long __cdecl _lrotl(unsigned long, int);
|
||||
static __inline__
|
||||
unsigned long __cdecl _lrotr(unsigned long, int);
|
||||
static __inline__
|
||||
unsigned int _lzcnt_u32(unsigned int);
|
||||
static __inline__
|
||||
void _ReadBarrier(void);
|
||||
static __inline__
|
||||
void _ReadWriteBarrier(void);
|
||||
static __inline__
|
||||
void *_ReturnAddress(void);
|
||||
unsigned int _rorx_u32(unsigned int, const unsigned int);
|
||||
int __cdecl _rdrand16_step(unsigned short *);
|
||||
int __cdecl _rdrand32_step(unsigned int *);
|
||||
static __inline__
|
||||
unsigned int __cdecl _rotl(unsigned int _Value, int _Shift);
|
||||
static __inline__
|
||||
unsigned short _rotl16(unsigned short _Value, unsigned char _Shift);
|
||||
static __inline__
|
||||
unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift);
|
||||
static __inline__
|
||||
unsigned char _rotl8(unsigned char _Value, unsigned char _Shift);
|
||||
static __inline__
|
||||
unsigned int __cdecl _rotr(unsigned int _Value, int _Shift);
|
||||
static __inline__
|
||||
unsigned short _rotr16(unsigned short _Value, unsigned char _Shift);
|
||||
static __inline__
|
||||
unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift);
|
||||
static __inline__
|
||||
unsigned char _rotr8(unsigned char _Value, unsigned char _Shift);
|
||||
int _sarx_i32(int, unsigned int);
|
||||
|
||||
/* FIXME: Need definition for jmp_buf.
|
||||
int __cdecl _setjmp(jmp_buf); */
|
||||
|
||||
unsigned int _shlx_u32(unsigned int, unsigned int);
|
||||
unsigned int _shrx_u32(unsigned int, unsigned int);
|
||||
void _Store_HLERelease(long volatile *, long);
|
||||
void _Store64_HLERelease(__int64 volatile *, __int64);
|
||||
void _StorePointer_HLERelease(void *volatile *, void *);
|
||||
unsigned int _t1mskc_u32(unsigned int);
|
||||
unsigned int _tzcnt_u32(unsigned int);
|
||||
unsigned int _tzcnt_u32(unsigned int);
|
||||
unsigned int _tzmsk_u32(unsigned int);
|
||||
static __inline__
|
||||
void _WriteBarrier(void);
|
||||
void _xabort(const unsigned int imm);
|
||||
unsigned __int32 xbegin(void);
|
||||
void _xend(void);
|
||||
unsigned __int64 __cdecl _xgetbv(unsigned int);
|
||||
void __cdecl _xrstor(void const *, unsigned __int64);
|
||||
void __cdecl _xsave(void *, unsigned __int64);
|
||||
void __cdecl _xsaveopt(void *, unsigned __int64);
|
||||
void __cdecl _xsetbv(unsigned int, unsigned __int64);
|
||||
unsigned char _xtest(void);
|
||||
|
||||
/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
|
||||
#ifdef __x86_64__
|
||||
void __addgsbyte(unsigned long, unsigned char);
|
||||
void __addgsdword(unsigned long, unsigned long);
|
||||
void __addgsqword(unsigned long, unsigned __int64);
|
||||
void __addgsword(unsigned long, unsigned short);
|
||||
void __faststorefence(void);
|
||||
void __incgsbyte(unsigned long);
|
||||
void __incgsdword(unsigned long);
|
||||
void __incgsqword(unsigned long);
|
||||
void __incgsword(unsigned long);
|
||||
unsigned __int64 __popcnt64(unsigned __int64);
|
||||
unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
|
||||
unsigned __int64 _HighPart,
|
||||
unsigned char _Shift);
|
||||
unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
|
||||
unsigned __int64 _HighPart,
|
||||
unsigned char _Shift);
|
||||
void __stosq(unsigned __int64 *, unsigned __int64, size_t);
|
||||
unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64);
|
||||
unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int);
|
||||
unsigned __int64 _bextri_u64(unsigned __int64, unsigned int);
|
||||
static __inline__
|
||||
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
static __inline__
|
||||
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
static __inline__
|
||||
unsigned char _bittest64(__int64 const *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandcomplement64(__int64 *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandreset64(__int64 *, __int64);
|
||||
static __inline__
|
||||
unsigned char _bittestandset64(__int64 *, __int64);
|
||||
unsigned __int64 _blcfill_u64(unsigned __int64);
|
||||
unsigned __int64 _blci_u64(unsigned __int64);
|
||||
unsigned __int64 _blcic_u64(unsigned __int64);
|
||||
unsigned __int64 _blcmsk_u64(unsigned __int64);
|
||||
unsigned __int64 _blcs_u64(unsigned __int64);
|
||||
unsigned __int64 _blsfill_u64(unsigned __int64);
|
||||
unsigned __int64 _blsi_u64(unsigned __int64);
|
||||
unsigned __int64 _blsic_u64(unsigned __int64);
|
||||
unsigned __int64 _blmsk_u64(unsigned __int64);
|
||||
unsigned __int64 _blsr_u64(unsigned __int64);
|
||||
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
|
||||
unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int);
|
||||
void __cdecl _fxrstor64(void const *);
|
||||
void __cdecl _fxsave64(void *);
|
||||
long _InterlockedAnd_np(long volatile *_Value, long _Mask);
|
||||
short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
|
||||
__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
|
||||
char _InterlockedAnd8_np(char volatile *_Value, char _Mask);
|
||||
unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
|
||||
unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
|
||||
long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
|
||||
long _Comparand);
|
||||
unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_CompareandResult);
|
||||
unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
short _InterlockedCompareExchange16_np(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,
|
||||
void *_Exchange, void *_Comparand);
|
||||
long _InterlockedOr_np(long volatile *_Value, long _Mask);
|
||||
short _InterlockedOr16_np(short volatile *_Value, short _Mask);
|
||||
__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);
|
||||
char _InterlockedOr8_np(char volatile *_Value, char _Mask);
|
||||
long _InterlockedXor_np(long volatile *_Value, long _Mask);
|
||||
short _InterlockedXor16_np(short volatile *_Value, short _Mask);
|
||||
__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);
|
||||
char _InterlockedXor8_np(char volatile *_Value, char _Mask);
|
||||
unsigned __int64 _lzcnt_u64(unsigned __int64);
|
||||
__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand,
|
||||
__int64 *_HighProduct);
|
||||
unsigned int __cdecl _readfsbase_u32(void);
|
||||
unsigned __int64 __cdecl _readfsbase_u64(void);
|
||||
unsigned int __cdecl _readgsbase_u32(void);
|
||||
unsigned __int64 __cdecl _readgsbase_u64(void);
|
||||
unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);
|
||||
unsigned __int64 _tzcnt_u64(unsigned __int64);
|
||||
unsigned __int64 _tzmsk_u64(unsigned __int64);
|
||||
unsigned __int64 _umul128(unsigned __int64 _Multiplier,
|
||||
unsigned __int64 _Multiplicand,
|
||||
unsigned __int64 *_HighProduct);
|
||||
void __cdecl _writefsbase_u32(unsigned int);
|
||||
void _cdecl _writefsbase_u64(unsigned __int64);
|
||||
void __cdecl _writegsbase_u32(unsigned int);
|
||||
void __cdecl _writegsbase_u64(unsigned __int64);
|
||||
void __cdecl _xrstor64(void const *, unsigned __int64);
|
||||
void __cdecl _xsave64(void *, unsigned __int64);
|
||||
void __cdecl _xsaveopt64(void *, unsigned __int64);
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Twiddling
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_rotl8(unsigned char _Value, unsigned char _Shift) {
|
||||
_Shift &= 0x7;
|
||||
return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_rotr8(unsigned char _Value, unsigned char _Shift) {
|
||||
_Shift &= 0x7;
|
||||
return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value;
|
||||
}
|
||||
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
|
||||
_rotl16(unsigned short _Value, unsigned char _Shift) {
|
||||
_Shift &= 0xf;
|
||||
return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value;
|
||||
}
|
||||
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
|
||||
_rotr16(unsigned short _Value, unsigned char _Shift) {
|
||||
_Shift &= 0xf;
|
||||
return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value;
|
||||
}
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
_rotl(unsigned int _Value, int _Shift) {
|
||||
_Shift &= 0x1f;
|
||||
return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
|
||||
}
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
_rotr(unsigned int _Value, int _Shift) {
|
||||
_Shift &= 0x1f;
|
||||
return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
|
||||
}
|
||||
static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
|
||||
_lrotl(unsigned long _Value, int _Shift) {
|
||||
_Shift &= 0x1f;
|
||||
return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value;
|
||||
}
|
||||
static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
|
||||
_lrotr(unsigned long _Value, int _Shift) {
|
||||
_Shift &= 0x1f;
|
||||
return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
|
||||
}
|
||||
static
|
||||
__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_rotl64(unsigned __int64 _Value, int _Shift) {
|
||||
_Shift &= 0x3f;
|
||||
return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value;
|
||||
}
|
||||
static
|
||||
__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_rotr64(unsigned __int64 _Value, int _Shift) {
|
||||
_Shift &= 0x3f;
|
||||
return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Counting and Testing
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_BitScanForward(unsigned long *_Index, unsigned long _Mask) {
|
||||
if (!_Mask)
|
||||
return 0;
|
||||
*_Index = __builtin_ctzl(_Mask);
|
||||
return 1;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_BitScanReverse(unsigned long *_Index, unsigned long _Mask) {
|
||||
if (!_Mask)
|
||||
return 0;
|
||||
*_Index = 31 - __builtin_clzl(_Mask);
|
||||
return 1;
|
||||
}
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
_lzcnt_u32(unsigned int a) {
|
||||
if (!a)
|
||||
return 32;
|
||||
return __builtin_clzl(a);
|
||||
}
|
||||
static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
|
||||
__popcnt16(unsigned short value) {
|
||||
return __builtin_popcount((int)value);
|
||||
}
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__popcnt(unsigned int value) {
|
||||
return __builtin_popcount(value);
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_bittest(long const *a, long b) {
|
||||
return (*a >> b) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_bittestandcomplement(long *a, long b) {
|
||||
unsigned char x = (*a >> b) & 1;
|
||||
*a = *a ^ (1 << b);
|
||||
return x;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_bittestandreset(long *a, long b) {
|
||||
unsigned char x = (*a >> b) & 1;
|
||||
*a = *a & ~(1 << b);
|
||||
return x;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_bittestandset(long *a, long b) {
|
||||
unsigned char x = (*a >> b) & 1;
|
||||
*a = *a | (1 << b);
|
||||
return x;
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) {
|
||||
if (!_Mask)
|
||||
return 0;
|
||||
*_Index = __builtin_ctzll(_Mask);
|
||||
return 1;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) {
|
||||
if (!_Mask)
|
||||
return 0;
|
||||
*_Index = 63 - __builtin_clzll(_Mask);
|
||||
return 1;
|
||||
}
|
||||
static
|
||||
__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_lzcnt_u64(unsigned __int64 a) {
|
||||
if (!a)
|
||||
return 64;
|
||||
return __builtin_clzll(a);
|
||||
}
|
||||
static __inline__
|
||||
unsigned __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
__popcnt64(unsigned __int64 value) {
|
||||
return __builtin_popcountll(value);
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_bittest64(__int64 const *a, __int64 b) {
|
||||
return (*a >> b) & 1;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_bittestandcomplement64(__int64 *a, __int64 b) {
|
||||
unsigned char x = (*a >> b) & 1;
|
||||
*a = *a ^ (1ll << b);
|
||||
return x;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_bittestandreset64(__int64 *a, __int64 b) {
|
||||
unsigned char x = (*a >> b) & 1;
|
||||
*a = *a & ~(1ll << b);
|
||||
return x;
|
||||
}
|
||||
static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
|
||||
_bittestandset64(__int64 *a, __int64 b) {
|
||||
unsigned char x = (*a >> b) & 1;
|
||||
*a = *a | (1ll << b);
|
||||
return x;
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange Add
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) {
|
||||
return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
|
||||
}
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) {
|
||||
return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchangeAdd(long volatile *_Addend, long _Value) {
|
||||
return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) {
|
||||
return __atomic_add_fetch(_Addend, _Value, 0) - _Value;
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange Sub
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchangeSub8(char volatile *_Subend, char _Value) {
|
||||
return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
|
||||
}
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchangeSub16(short volatile *_Subend, short _Value) {
|
||||
return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchangeSub(long volatile *_Subend, long _Value) {
|
||||
return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) {
|
||||
return __atomic_sub_fetch(_Subend, _Value, 0) + _Value;
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Increment
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedIncrement16(char volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, 0);
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedIncrement(long volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, 0);
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedIncrement64(__int64 volatile *_Value) {
|
||||
return __atomic_add_fetch(_Value, 1, 0);
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Decrement
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedDecrement16(char volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, 0);
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedDecrement(long volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, 0);
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedDecrement64(__int64 volatile *_Value) {
|
||||
return __atomic_sub_fetch(_Value, 1, 0);
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked And
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedAnd8(char volatile *_Value, char _Mask) {
|
||||
return __atomic_and_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedAnd16(short volatile *_Value, short _Mask) {
|
||||
return __atomic_and_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedAnd(long volatile *_Value, long _Mask) {
|
||||
return __atomic_and_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_and_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Or
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedOr8(char volatile *_Value, char _Mask) {
|
||||
return __atomic_or_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedOr16(short volatile *_Value, short _Mask) {
|
||||
return __atomic_or_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedOr(long volatile *_Value, long _Mask) {
|
||||
return __atomic_or_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_or_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Xor
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedXor8(char volatile *_Value, char _Mask) {
|
||||
return __atomic_xor_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedXor16(short volatile *_Value, short _Mask) {
|
||||
return __atomic_xor_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedXor(long volatile *_Value, long _Mask) {
|
||||
return __atomic_xor_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) {
|
||||
return __atomic_xor_fetch(_Value, _Mask, 0);
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchange8(char volatile *_Target, char _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, 0);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchange16(short volatile *_Target, short _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, 0);
|
||||
return _Value;
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchange(long volatile *_Target, long _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, 0);
|
||||
return _Value;
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) {
|
||||
__atomic_exchange(_Target, &_Value, &_Value, 0);
|
||||
return _Value;
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Compare Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ char __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedCompareExchange8(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ short __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedCompareExchange16(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
|
||||
return _Comparand;
|
||||
}
|
||||
static __inline__ long __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedCompareExchange(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
|
||||
return _Comparand;
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
static __inline__ __int64 __attribute__((__always_inline__, __nodebug__))
|
||||
_InterlockedCompareExchange64(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand) {
|
||||
__atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0);
|
||||
return _Comparand;
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Barriers
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
|
||||
_ReadWriteBarrier(void) {
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
}
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
|
||||
_ReadBarrier(void) {
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
}
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
|
||||
_WriteBarrier(void) {
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
}
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Misc
|
||||
\*----------------------------------------------------------------------------*/
|
||||
static __inline__ void * __attribute__((__always_inline__, __nodebug__))
|
||||
_AddressOfReturnAddress(void) {
|
||||
return (void*)((char*)__builtin_frame_address(0) + sizeof(void*));
|
||||
}
|
||||
static __inline__ void * __attribute__((__always_inline__, __nodebug__))
|
||||
_ReturnAddress(void) {
|
||||
return __builtin_return_address(0);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __INTRIN_H */
|
||||
#endif /* _MSC_VER */
|
@ -25,6 +25,9 @@
|
||||
#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX2INTRIN_H
|
||||
#define __AVX2INTRIN_H
|
||||
|
||||
/* SSE4 Multiple Packed Sums of Absolute Difference. */
|
||||
#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
|
||||
|
||||
@ -750,9 +753,9 @@ _mm256_broadcastsd_pd(__m128d __X)
|
||||
}
|
||||
|
||||
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_broadcastsi128_si256(__m128i const *__a)
|
||||
_mm256_broadcastsi128_si256(__m128i __X)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vbroadcastsi256(__a);
|
||||
return (__m256i)__builtin_ia32_vbroadcastsi256(__X);
|
||||
}
|
||||
|
||||
#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
|
||||
@ -1058,7 +1061,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
|
||||
|
||||
#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
|
||||
__m128i __a = (a); \
|
||||
int const *__m = (m); \
|
||||
long long const *__m = (m); \
|
||||
__m128i __i = (i); \
|
||||
__m128i __mask = (mask); \
|
||||
(__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \
|
||||
@ -1066,7 +1069,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
|
||||
|
||||
#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
|
||||
__m256i __a = (a); \
|
||||
int const *__m = (m); \
|
||||
long long const *__m = (m); \
|
||||
__m128i __i = (i); \
|
||||
__m256i __mask = (mask); \
|
||||
(__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \
|
||||
@ -1074,7 +1077,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
|
||||
|
||||
#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
|
||||
__m128i __a = (a); \
|
||||
int const *__m = (m); \
|
||||
long long const *__m = (m); \
|
||||
__m128i __i = (i); \
|
||||
__m128i __mask = (mask); \
|
||||
(__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \
|
||||
@ -1082,7 +1085,7 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
|
||||
|
||||
#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
|
||||
__m256i __a = (a); \
|
||||
int const *__m = (m); \
|
||||
long long const *__m = (m); \
|
||||
__m256i __i = (i); \
|
||||
__m256i __mask = (mask); \
|
||||
(__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \
|
||||
@ -1173,29 +1176,31 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
|
||||
(__v4si)_mm_set1_epi32(-1), (s)); })
|
||||
|
||||
#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \
|
||||
int const *__m = (m); \
|
||||
long long const *__m = (m); \
|
||||
__m128i __i = (i); \
|
||||
(__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \
|
||||
(const __v2di *)__m, (__v4si)__i, \
|
||||
(__v2di)_mm_set1_epi64x(-1), (s)); })
|
||||
|
||||
#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \
|
||||
int const *__m = (m); \
|
||||
long long const *__m = (m); \
|
||||
__m128i __i = (i); \
|
||||
(__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \
|
||||
(const __v4di *)__m, (__v4si)__i, \
|
||||
(__v4di)_mm256_set1_epi64x(-1), (s)); })
|
||||
|
||||
#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \
|
||||
int const *__m = (m); \
|
||||
long long const *__m = (m); \
|
||||
__m128i __i = (i); \
|
||||
(__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \
|
||||
(const __v2di *)__m, (__v2di)__i, \
|
||||
(__v2di)_mm_set1_epi64x(-1), (s)); })
|
||||
|
||||
#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \
|
||||
int const *__m = (m); \
|
||||
long long const *__m = (m); \
|
||||
__m256i __i = (i); \
|
||||
(__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \
|
||||
(const __v4di *)__m, (__v4di)__i, \
|
||||
(__v4di)_mm256_set1_epi64x(-1), (s)); })
|
||||
|
||||
#endif /* __AVX2INTRIN_H */
|
||||
|
@ -25,6 +25,9 @@
|
||||
#error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVXINTRIN_H
|
||||
#define __AVXINTRIN_H
|
||||
|
||||
typedef double __v4df __attribute__ ((__vector_size__ (32)));
|
||||
typedef float __v8sf __attribute__ ((__vector_size__ (32)));
|
||||
typedef long long __v4di __attribute__ ((__vector_size__ (32)));
|
||||
@ -432,21 +435,21 @@ static __inline int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_extract_epi32(__m256i __a, int const __imm)
|
||||
{
|
||||
__v8si __b = (__v8si)__a;
|
||||
return __b[__imm];
|
||||
return __b[__imm & 7];
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_extract_epi16(__m256i __a, int const __imm)
|
||||
{
|
||||
__v16hi __b = (__v16hi)__a;
|
||||
return __b[__imm];
|
||||
return __b[__imm & 15];
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_extract_epi8(__m256i __a, int const __imm)
|
||||
{
|
||||
__v32qi __b = (__v32qi)__a;
|
||||
return __b[__imm];
|
||||
return __b[__imm & 31];
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
@ -454,7 +457,7 @@ static __inline long long __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_extract_epi64(__m256i __a, const int __imm)
|
||||
{
|
||||
__v4di __b = (__v4di)__a;
|
||||
return __b[__imm];
|
||||
return __b[__imm & 3];
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1134,22 +1137,19 @@ _mm256_castsi256_si128(__m256i __a)
|
||||
static __inline __m256d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_castpd128_pd256(__m128d __a)
|
||||
{
|
||||
__m128d __zero = _mm_setzero_pd();
|
||||
return __builtin_shufflevector(__a, __zero, 0, 1, 2, 2);
|
||||
return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
|
||||
}
|
||||
|
||||
static __inline __m256 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_castps128_ps256(__m128 __a)
|
||||
{
|
||||
__m128 __zero = _mm_setzero_ps();
|
||||
return __builtin_shufflevector(__a, __zero, 0, 1, 2, 3, 4, 4, 4, 4);
|
||||
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
|
||||
}
|
||||
|
||||
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm256_castsi128_si256(__m128i __a)
|
||||
{
|
||||
__m128i __zero = _mm_setzero_si128();
|
||||
return __builtin_shufflevector(__a, __zero, 0, 1, 2, 2);
|
||||
return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
|
||||
}
|
||||
|
||||
/* SIMD load ops (unaligned) */
|
||||
@ -1220,3 +1220,5 @@ _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)
|
||||
__v128 = _mm256_extractf128_si256(__a, 1);
|
||||
__builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
|
||||
}
|
||||
|
||||
#endif /* __AVXINTRIN_H */
|
||||
|
@ -25,10 +25,132 @@
|
||||
#error this header is for x86 only
|
||||
#endif
|
||||
|
||||
/* Features in %ecx for level 1 */
|
||||
#define bit_SSE3 0x00000001
|
||||
#define bit_PCLMULQDQ 0x00000002
|
||||
#define bit_DTES64 0x00000004
|
||||
#define bit_MONITOR 0x00000008
|
||||
#define bit_DSCPL 0x00000010
|
||||
#define bit_VMX 0x00000020
|
||||
#define bit_SMX 0x00000040
|
||||
#define bit_EIST 0x00000080
|
||||
#define bit_TM2 0x00000100
|
||||
#define bit_SSSE3 0x00000200
|
||||
#define bit_CNXTID 0x00000400
|
||||
#define bit_FMA 0x00001000
|
||||
#define bit_CMPXCHG16B 0x00002000
|
||||
#define bit_xTPR 0x00004000
|
||||
#define bit_PDCM 0x00008000
|
||||
#define bit_PCID 0x00020000
|
||||
#define bit_DCA 0x00040000
|
||||
#define bit_SSE41 0x00080000
|
||||
#define bit_SSE42 0x00100000
|
||||
#define bit_x2APIC 0x00200000
|
||||
#define bit_MOVBE 0x00400000
|
||||
#define bit_POPCNT 0x00800000
|
||||
#define bit_TSCDeadline 0x01000000
|
||||
#define bit_AESNI 0x02000000
|
||||
#define bit_XSAVE 0x04000000
|
||||
#define bit_OSXSAVE 0x08000000
|
||||
#define bit_AVX 0x10000000
|
||||
#define bit_RDRAND 0x40000000
|
||||
|
||||
/* Features in %edx for level 1 */
|
||||
#define bit_FPU 0x00000001
|
||||
#define bit_VME 0x00000002
|
||||
#define bit_DE 0x00000004
|
||||
#define bit_PSE 0x00000008
|
||||
#define bit_TSC 0x00000010
|
||||
#define bit_MSR 0x00000020
|
||||
#define bit_PAE 0x00000040
|
||||
#define bit_MCE 0x00000080
|
||||
#define bit_CX8 0x00000100
|
||||
#define bit_APIC 0x00000200
|
||||
#define bit_SEP 0x00000800
|
||||
#define bit_MTRR 0x00001000
|
||||
#define bit_PGE 0x00002000
|
||||
#define bit_MCA 0x00004000
|
||||
#define bit_CMOV 0x00008000
|
||||
#define bit_PAT 0x00010000
|
||||
#define bit_PSE36 0x00020000
|
||||
#define bit_PSN 0x00040000
|
||||
#define bit_CLFSH 0x00080000
|
||||
#define bit_DS 0x00200000
|
||||
#define bit_ACPI 0x00400000
|
||||
#define bit_MMX 0x00800000
|
||||
#define bit_FXSR 0x01000000
|
||||
#define bit_SSE 0x02000000
|
||||
#define bit_SSE2 0x04000000
|
||||
#define bit_SS 0x08000000
|
||||
#define bit_HTT 0x10000000
|
||||
#define bit_TM 0x20000000
|
||||
#define bit_PBE 0x80000000
|
||||
|
||||
/* Features in %ebx for level 7 sub-leaf 0 */
|
||||
#define bit_FSGSBASE 0x00000001
|
||||
#define bit_SMEP 0x00000080
|
||||
#define bit_ENH_MOVSB 0x00000200
|
||||
|
||||
/* PIC on i386 uses %ebx, so preserve it. */
|
||||
#if __i386__
|
||||
#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
|
||||
__asm(" pushl %%ebx\n" \
|
||||
" cpuid\n" \
|
||||
" mov %%ebx,%1\n" \
|
||||
" popl %%ebx" \
|
||||
: "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
|
||||
: "0"(__level))
|
||||
|
||||
#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
|
||||
__asm(" pushl %%ebx\n" \
|
||||
" cpuid\n" \
|
||||
" mov %%ebx,%1\n" \
|
||||
" popl %%ebx" \
|
||||
: "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
|
||||
: "0"(__level), "2"(__count))
|
||||
#else
|
||||
#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
|
||||
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
|
||||
: "0"(__level))
|
||||
|
||||
#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
|
||||
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
|
||||
: "0"(__level), "2"(__count))
|
||||
#endif
|
||||
|
||||
static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
|
||||
unsigned int *__ebx, unsigned int *__ecx,
|
||||
unsigned int *__edx) {
|
||||
__asm("cpuid" : "=a"(*__eax), "=b" (*__ebx), "=c"(*__ecx), "=d"(*__edx)
|
||||
: "0"(__level));
|
||||
__cpuid(__level, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static __inline int __get_cpuid_max (unsigned int __level, unsigned int *__sig)
|
||||
{
|
||||
unsigned int __eax, __ebx, __ecx, __edx;
|
||||
#if __i386__
|
||||
int __cpuid_supported;
|
||||
|
||||
__asm(" pushfl\n"
|
||||
" popl %%eax\n"
|
||||
" movl %%eax,%%ecx\n"
|
||||
" xorl $0x00200000,%%eax\n"
|
||||
" pushl %%eax\n"
|
||||
" popfl\n"
|
||||
" pushfl\n"
|
||||
" popl %%eax\n"
|
||||
" movl $0,%0\n"
|
||||
" cmpl %%eax,%%ecx\n"
|
||||
" je 1f\n"
|
||||
" movl $1,%0\n"
|
||||
"1:"
|
||||
: "=r" (__cpuid_supported) : : "eax", "ecx");
|
||||
if (!__cpuid_supported)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
__cpuid(__level, __eax, __ebx, __ecx, __edx);
|
||||
if (__sig)
|
||||
*__sig = __ebx;
|
||||
return __eax;
|
||||
}
|
||||
|
@ -245,13 +245,15 @@ _mm_cmple_sd(__m128d __a, __m128d __b)
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cmpgt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_cmpsd(__b, __a, 1);
|
||||
__m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
|
||||
return (__m128d) { __c[0], __a[1] };
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cmpge_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_cmpsd(__b, __a, 2);
|
||||
__m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
|
||||
return (__m128d) { __c[0], __a[1] };
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
@ -287,13 +289,15 @@ _mm_cmpnle_sd(__m128d __a, __m128d __b)
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cmpngt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_cmpsd(__b, __a, 5);
|
||||
__m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
|
||||
return (__m128d) { __c[0], __a[1] };
|
||||
}
|
||||
|
||||
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cmpnge_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_cmpsd(__b, __a, 6);
|
||||
__m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
|
||||
return (__m128d) { __c[0], __a[1] };
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
@ -822,7 +826,9 @@ _mm_xor_si128(__m128i __a, __m128i __b)
|
||||
}
|
||||
|
||||
#define _mm_slli_si128(a, count) __extension__ ({ \
|
||||
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
|
||||
__m128i __a = (a); \
|
||||
_Pragma("clang diagnostic pop"); \
|
||||
(__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
@ -887,7 +893,9 @@ _mm_sra_epi32(__m128i __a, __m128i __count)
|
||||
|
||||
|
||||
#define _mm_srli_si128(a, count) __extension__ ({ \
|
||||
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
|
||||
__m128i __a = (a); \
|
||||
_Pragma("clang diagnostic pop"); \
|
||||
(__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
@ -1210,6 +1218,14 @@ _mm_stream_si32(int *__p, int __a)
|
||||
__builtin_ia32_movnti(__p, __a);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_stream_si64(long long *__p, long long __a)
|
||||
{
|
||||
__builtin_ia32_movnti64(__p, __a);
|
||||
}
|
||||
#endif
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_clflush(void const *__p)
|
||||
{
|
||||
@ -1250,7 +1266,7 @@ static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_extract_epi16(__m128i __a, int __imm)
|
||||
{
|
||||
__v8hi __b = (__v8hi)__a;
|
||||
return (unsigned short)__b[__imm];
|
||||
return (unsigned short)__b[__imm & 7];
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
@ -1268,20 +1284,26 @@ _mm_movemask_epi8(__m128i __a)
|
||||
}
|
||||
|
||||
#define _mm_shuffle_epi32(a, imm) __extension__ ({ \
|
||||
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
|
||||
__m128i __a = (a); \
|
||||
_Pragma("clang diagnostic pop"); \
|
||||
(__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
|
||||
(imm) & 0x3, ((imm) & 0xc) >> 2, \
|
||||
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
|
||||
|
||||
#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
|
||||
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
|
||||
__m128i __a = (a); \
|
||||
_Pragma("clang diagnostic pop"); \
|
||||
(__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
|
||||
(imm) & 0x3, ((imm) & 0xc) >> 2, \
|
||||
((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
|
||||
4, 5, 6, 7); })
|
||||
|
||||
#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
|
||||
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
|
||||
__m128i __a = (a); \
|
||||
_Pragma("clang diagnostic pop"); \
|
||||
(__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
|
||||
0, 1, 2, 3, \
|
||||
4 + (((imm) & 0x03) >> 0), \
|
||||
@ -1344,7 +1366,7 @@ _mm_movepi64_pi64(__m128i __a)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_movpi64_pi64(__m64 __a)
|
||||
_mm_movpi64_epi64(__m64 __a)
|
||||
{
|
||||
return (__m128i){ (long long)__a, 0 };
|
||||
}
|
||||
@ -1374,8 +1396,10 @@ _mm_movemask_pd(__m128d __a)
|
||||
}
|
||||
|
||||
#define _mm_shuffle_pd(a, b, i) __extension__ ({ \
|
||||
_Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
|
||||
__m128d __a = (a); \
|
||||
__m128d __b = (b); \
|
||||
_Pragma("clang diagnostic pop"); \
|
||||
__builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*===---- f16cintrin.h - F16C intrinsics ---------------------------------===
|
||||
/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining __a copy
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
|
@ -111,4 +111,8 @@ _xtest(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __SHA__
|
||||
#include <shaintrin.h>
|
||||
#endif
|
||||
|
||||
#endif /* __IMMINTRIN_H */
|
||||
|
@ -87,8 +87,10 @@
|
||||
#define CHAR_MAX __SCHAR_MAX__
|
||||
#endif
|
||||
|
||||
/* C99 5.2.4.2.1: Added long long. */
|
||||
#if __STDC_VERSION__ >= 199901
|
||||
/* C99 5.2.4.2.1: Added long long.
|
||||
C++11 18.3.3.2: same contents as the Standard C Library header <limits.h>.
|
||||
*/
|
||||
#if __STDC_VERSION__ >= 199901 || __cplusplus >= 201103L
|
||||
|
||||
#undef LLONG_MIN
|
||||
#undef LLONG_MAX
|
||||
|
@ -4,6 +4,16 @@ module _Builtin_intrinsics [system] {
|
||||
header "altivec.h"
|
||||
}
|
||||
|
||||
explicit module arm {
|
||||
requires arm
|
||||
|
||||
explicit module neon {
|
||||
requires neon
|
||||
header "arm_neon.h"
|
||||
export *
|
||||
}
|
||||
}
|
||||
|
||||
explicit module intel {
|
||||
requires x86
|
||||
export *
|
||||
@ -34,6 +44,7 @@ module _Builtin_intrinsics [system] {
|
||||
explicit module sse {
|
||||
requires sse
|
||||
export mmx
|
||||
export * // note: for hackish <emmintrin.h> dependency
|
||||
header "xmmintrin.h"
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,9 @@
|
||||
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __PRFCHWINTRIN_H
|
||||
#define __PRFCHWINTRIN_H
|
||||
|
||||
#if defined(__PRFCHW__) || defined(__3dNOW__)
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||
_m_prefetchw(void *__P)
|
||||
@ -32,3 +35,5 @@ _m_prefetchw(void *__P)
|
||||
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __PRFCHWINTRIN_H */
|
||||
|
@ -25,6 +25,9 @@
|
||||
#error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __RDSEEDINTRIN_H
|
||||
#define __RDSEEDINTRIN_H
|
||||
|
||||
#ifdef __RDSEED__
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_rdseed16_step(unsigned short *__p)
|
||||
@ -46,3 +49,4 @@ _rdseed64_step(unsigned long long *__p)
|
||||
}
|
||||
#endif
|
||||
#endif /* __RDSEED__ */
|
||||
#endif /* __RDSEEDINTRIN_H */
|
||||
|
@ -25,6 +25,9 @@
|
||||
#error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __RTMINTRIN_H
|
||||
#define __RTMINTRIN_H
|
||||
|
||||
#define _XBEGIN_STARTED (~0u)
|
||||
#define _XABORT_EXPLICIT (1 << 0)
|
||||
#define _XABORT_RETRY (1 << 1)
|
||||
@ -47,3 +50,5 @@ _xend(void)
|
||||
}
|
||||
|
||||
#define _xabort(imm) __builtin_ia32_xabort((imm))
|
||||
|
||||
#endif /* __RTMINTRIN_H */
|
||||
|
74
python/clang_includes/shaintrin.h
Normal file
74
python/clang_includes/shaintrin.h
Normal file
@ -0,0 +1,74 @@
|
||||
/*===---- shaintrin.h - SHA intrinsics -------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <shaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __SHAINTRIN_H
|
||||
#define __SHAINTRIN_H
|
||||
|
||||
#if !defined (__SHA__)
|
||||
# error "SHA instructions not enabled"
|
||||
#endif
|
||||
|
||||
#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
|
||||
__builtin_ia32_sha1rnds4((V1), (V2), (M)); })
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return __builtin_ia32_sha1nexte(__X, __Y);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sha1msg1_epu32(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return __builtin_ia32_sha1msg1(__X, __Y);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sha1msg2_epu32(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return __builtin_ia32_sha1msg2(__X, __Y);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return __builtin_ia32_sha256rnds2(__X, __Y, __Z);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sha256msg1_epu32(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return __builtin_ia32_sha256msg1(__X, __Y);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sha256msg2_epu32(__m128i __X, __m128i __Y)
|
||||
{
|
||||
return __builtin_ia32_sha256msg2(__X, __Y);
|
||||
}
|
||||
|
||||
#endif /* __SHAINTRIN_H */
|
@ -197,7 +197,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
#define _mm_extract_ps(X, N) (__extension__ \
|
||||
({ union { int __i; float __f; } __t; \
|
||||
__v4sf __a = (__v4sf)(X); \
|
||||
__t.__f = __a[N]; \
|
||||
__t.__f = __a[(N) & 3]; \
|
||||
__t.__i;}))
|
||||
|
||||
/* Miscellaneous insert and extract macros. */
|
||||
@ -215,14 +215,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
|
||||
/* Insert int into packed integer array at index. */
|
||||
#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
|
||||
__a[(N)] = (I); \
|
||||
__a[(N) & 15] = (I); \
|
||||
__a;}))
|
||||
#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
|
||||
__a[(N)] = (I); \
|
||||
__a[(N) & 3] = (I); \
|
||||
__a;}))
|
||||
#ifdef __x86_64__
|
||||
#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
|
||||
__a[(N)] = (I); \
|
||||
__a[(N) & 1] = (I); \
|
||||
__a;}))
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
@ -230,12 +230,13 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
|
||||
* as a zero extended value, so it is unsigned.
|
||||
*/
|
||||
#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
|
||||
(unsigned char)__a[(N)];}))
|
||||
(int)(unsigned char) \
|
||||
__a[(N) & 15];}))
|
||||
#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
|
||||
(unsigned)__a[(N)];}))
|
||||
__a[(N) & 3];}))
|
||||
#ifdef __x86_64__
|
||||
#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
|
||||
__a[(N)];}))
|
||||
__a[(N) & 1];}))
|
||||
#endif /* __x86_64 */
|
||||
|
||||
/* SSE4 128-bit Packed Integer Comparisons. */
|
||||
|
158
python/clang_includes/tbmintrin.h
Normal file
158
python/clang_includes/tbmintrin.h
Normal file
@ -0,0 +1,158 @@
|
||||
/*===---- tbmintrin.h - TBM intrinsics -------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __TBM__
|
||||
#error "TBM instruction set is not enabled"
|
||||
#endif
|
||||
|
||||
#ifndef __X86INTRIN_H
|
||||
#error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __TBMINTRIN_H
|
||||
#define __TBMINTRIN_H
|
||||
|
||||
#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b)))
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__blcfill_u32(unsigned int a)
|
||||
{
|
||||
return a & (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__blci_u32(unsigned int a)
|
||||
{
|
||||
return a | ~(a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__blcic_u32(unsigned int a)
|
||||
{
|
||||
return ~a & (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__blcmsk_u32(unsigned int a)
|
||||
{
|
||||
return a ^ (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__blcs_u32(unsigned int a)
|
||||
{
|
||||
return a | (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__blsfill_u32(unsigned int a)
|
||||
{
|
||||
return a | (a - 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__blsic_u32(unsigned int a)
|
||||
{
|
||||
return ~a | (a - 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__t1mskc_u32(unsigned int a)
|
||||
{
|
||||
return ~a | (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
|
||||
__tzmsk_u32(unsigned int a)
|
||||
{
|
||||
return ~a & (a - 1);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b)))
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__blcfill_u64(unsigned long long a)
|
||||
{
|
||||
return a & (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__blci_u64(unsigned long long a)
|
||||
{
|
||||
return a | ~(a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__blcic_u64(unsigned long long a)
|
||||
{
|
||||
return ~a & (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__blcmsk_u64(unsigned long long a)
|
||||
{
|
||||
return a ^ (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__blcs_u64(unsigned long long a)
|
||||
{
|
||||
return a | (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__blsfill_u64(unsigned long long a)
|
||||
{
|
||||
return a | (a - 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__blsic_u64(unsigned long long a)
|
||||
{
|
||||
return ~a | (a - 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__t1mskc_u64(unsigned long long a)
|
||||
{
|
||||
return ~a | (a + 1);
|
||||
}
|
||||
|
||||
static __inline__ unsigned long long __attribute__((__always_inline__,
|
||||
__nodebug__))
|
||||
__tzmsk_u64(unsigned long long a)
|
||||
{
|
||||
return ~a & (a - 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __TBMINTRIN_H */
|
@ -1340,15 +1340,15 @@ static long double _Complex
|
||||
|
||||
// creal
|
||||
|
||||
static float _Complex
|
||||
static float
|
||||
_TG_ATTRS
|
||||
__tg_creal(float __x) {return __x;}
|
||||
|
||||
static double _Complex
|
||||
static double
|
||||
_TG_ATTRS
|
||||
__tg_creal(double __x) {return __x;}
|
||||
|
||||
static long double _Complex
|
||||
static long double
|
||||
_TG_ATTRS
|
||||
__tg_creal(long double __x) {return __x;}
|
||||
|
||||
|
@ -27,8 +27,8 @@
|
||||
#define __CLANG_UNWIND_H
|
||||
|
||||
#if __has_include_next(<unwind.h>)
|
||||
/* Darwin and libunwind provide an unwind.h. If that's available, use
|
||||
* it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
|
||||
/* Darwin (from 11.x on) and libunwind provide an unwind.h. If that's available,
|
||||
* use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
|
||||
* so define that around the include.*/
|
||||
# ifndef _GNU_SOURCE
|
||||
# define _SHOULD_UNDEFINE_GNU_SOURCE
|
||||
@ -66,7 +66,17 @@ extern "C" {
|
||||
#pragma GCC visibility push(default)
|
||||
#endif
|
||||
|
||||
typedef uintptr_t _Unwind_Word;
|
||||
typedef intptr_t _Unwind_Sword;
|
||||
typedef uintptr_t _Unwind_Ptr;
|
||||
typedef uintptr_t _Unwind_Internal_Ptr;
|
||||
typedef uint64_t _Unwind_Exception_Class;
|
||||
|
||||
typedef intptr_t _sleb128_t;
|
||||
typedef uintptr_t _uleb128_t;
|
||||
|
||||
struct _Unwind_Context;
|
||||
struct _Unwind_Exception;
|
||||
typedef enum {
|
||||
_URC_NO_REASON = 0,
|
||||
_URC_FOREIGN_EXCEPTION_CAUGHT = 1,
|
||||
@ -81,8 +91,43 @@ typedef enum {
|
||||
_URC_CONTINUE_UNWIND = 8
|
||||
} _Unwind_Reason_Code;
|
||||
|
||||
typedef enum {
|
||||
_UA_SEARCH_PHASE = 1,
|
||||
_UA_CLEANUP_PHASE = 2,
|
||||
|
||||
#ifdef __arm__
|
||||
_UA_HANDLER_FRAME = 4,
|
||||
_UA_FORCE_UNWIND = 8,
|
||||
_UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */
|
||||
} _Unwind_Action;
|
||||
|
||||
typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,
|
||||
struct _Unwind_Exception *);
|
||||
|
||||
struct _Unwind_Exception {
|
||||
_Unwind_Exception_Class exception_class;
|
||||
_Unwind_Exception_Cleanup_Fn exception_cleanup;
|
||||
_Unwind_Word private_1;
|
||||
_Unwind_Word private_2;
|
||||
/* The Itanium ABI requires that _Unwind_Exception objects are "double-word
|
||||
* aligned". GCC has interpreted this to mean "use the maximum useful
|
||||
* alignment for the target"; so do we. */
|
||||
} __attribute__((__aligned__));
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,
|
||||
_Unwind_Exception_Class,
|
||||
struct _Unwind_Exception *,
|
||||
struct _Unwind_Context *,
|
||||
void *);
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
|
||||
int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *,
|
||||
struct _Unwind_Context *);
|
||||
typedef _Unwind_Personality_Fn __personality_routine;
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,
|
||||
void *);
|
||||
|
||||
#if defined(__arm__) && !defined(__APPLE__)
|
||||
|
||||
typedef enum {
|
||||
_UVRSC_CORE = 0, /* integer register */
|
||||
@ -111,14 +156,116 @@ _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,
|
||||
_Unwind_VRS_DataRepresentation __representation,
|
||||
void *__valuep);
|
||||
|
||||
#else
|
||||
_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,
|
||||
_Unwind_VRS_RegClass __regclass,
|
||||
uint32_t __regno,
|
||||
_Unwind_VRS_DataRepresentation __representation,
|
||||
void *__valuep);
|
||||
|
||||
uintptr_t _Unwind_GetIP(struct _Unwind_Context* __context);
|
||||
static __inline__
|
||||
_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {
|
||||
_Unwind_Word __value;
|
||||
_Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);
|
||||
return __value;
|
||||
}
|
||||
|
||||
static __inline__
|
||||
void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,
|
||||
_Unwind_Word __value) {
|
||||
_Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);
|
||||
}
|
||||
|
||||
static __inline__
|
||||
_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {
|
||||
_Unwind_Word __ip = _Unwind_GetGR(__context, 15);
|
||||
return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */
|
||||
}
|
||||
|
||||
static __inline__
|
||||
void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {
|
||||
_Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;
|
||||
_Unwind_SetGR(__context, 15, __value | __thumb_mode_bit);
|
||||
}
|
||||
#else
|
||||
_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);
|
||||
void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);
|
||||
|
||||
_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);
|
||||
void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);
|
||||
#endif
|
||||
|
||||
|
||||
_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);
|
||||
|
||||
_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);
|
||||
|
||||
void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);
|
||||
|
||||
_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
|
||||
|
||||
/* DWARF EH functions; currently not available on Darwin/ARM */
|
||||
#if !defined(__APPLE__) || !defined(__arm__)
|
||||
|
||||
_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *,
|
||||
_Unwind_Stop_Fn, void *);
|
||||
void _Unwind_DeleteException(struct _Unwind_Exception *);
|
||||
void _Unwind_Resume(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *);
|
||||
|
||||
#endif
|
||||
|
||||
_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
|
||||
|
||||
/* setjmp(3)/longjmp(3) stuff */
|
||||
typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;
|
||||
|
||||
void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);
|
||||
void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *,
|
||||
_Unwind_Stop_Fn, void *);
|
||||
void _Unwind_SjLj_Resume(struct _Unwind_Exception *);
|
||||
_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *);
|
||||
|
||||
void *_Unwind_FindEnclosingFunction(void *);
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
||||
_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)
|
||||
__attribute__((unavailable));
|
||||
_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)
|
||||
__attribute__((unavailable));
|
||||
|
||||
/* Darwin-specific functions */
|
||||
void __register_frame(const void *);
|
||||
void __deregister_frame(const void *);
|
||||
|
||||
struct dwarf_eh_bases {
|
||||
uintptr_t tbase;
|
||||
uintptr_t dbase;
|
||||
uintptr_t func;
|
||||
};
|
||||
void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);
|
||||
|
||||
void __register_frame_info_bases(const void *, void *, void *, void *)
|
||||
__attribute__((unavailable));
|
||||
void __register_frame_info(const void *, void *) __attribute__((unavailable));
|
||||
void __register_frame_info_table_bases(const void *, void*, void *, void *)
|
||||
__attribute__((unavailable));
|
||||
void __register_frame_info_table(const void *, void *)
|
||||
__attribute__((unavailable));
|
||||
void __register_frame_table(const void *) __attribute__((unavailable));
|
||||
void __deregister_frame_info(const void *) __attribute__((unavailable));
|
||||
void __deregister_frame_info_bases(const void *)__attribute__((unavailable));
|
||||
|
||||
#else
|
||||
|
||||
_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);
|
||||
_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);
|
||||
|
||||
#endif
|
||||
|
||||
typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context*, void*);
|
||||
_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void*);
|
||||
|
||||
#ifndef HIDE_EXPORTS
|
||||
#pragma GCC visibility pop
|
||||
|
@ -66,6 +66,10 @@
|
||||
#include <xopintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __TBM__
|
||||
#include <tbmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __F16C__
|
||||
#include <f16cintrin.h>
|
||||
#endif
|
||||
|
@ -218,7 +218,9 @@ _mm_cmple_ps(__m128 __a, __m128 __b)
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cmpgt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return (__m128)__builtin_ia32_cmpss(__b, __a, 1);
|
||||
return (__m128)__builtin_shufflevector(__a,
|
||||
__builtin_ia32_cmpss(__b, __a, 1),
|
||||
4, 1, 2, 3);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
@ -230,7 +232,9 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b)
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cmpge_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return (__m128)__builtin_ia32_cmpss(__b, __a, 2);
|
||||
return (__m128)__builtin_shufflevector(__a,
|
||||
__builtin_ia32_cmpss(__b, __a, 2),
|
||||
4, 1, 2, 3);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
@ -278,7 +282,9 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b)
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cmpngt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return (__m128)__builtin_ia32_cmpss(__b, __a, 5);
|
||||
return (__m128)__builtin_shufflevector(__a,
|
||||
__builtin_ia32_cmpss(__b, __a, 5),
|
||||
4, 1, 2, 3);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
@ -290,7 +296,9 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b)
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_cmpnge_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return (__m128)__builtin_ia32_cmpss(__b, __a, 6);
|
||||
return (__m128)__builtin_shufflevector(__a,
|
||||
__builtin_ia32_cmpss(__b, __a, 6),
|
||||
4, 1, 2, 3);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
@ -983,12 +991,10 @@ do { \
|
||||
#define _m_ _mm_
|
||||
#define _m_ _mm_
|
||||
|
||||
#if !__has_feature(modules)
|
||||
/* Ugly hack for backwards-compatibility (compatible with gcc) */
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* __SSE__ */
|
||||
|
||||
|
@ -342,6 +342,399 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
|
||||
__m128i __B = (B); \
|
||||
(__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
|
||||
|
||||
#define _MM_PCOMCTRL_LT 0
|
||||
#define _MM_PCOMCTRL_LE 1
|
||||
#define _MM_PCOMCTRL_GT 2
|
||||
#define _MM_PCOMCTRL_GE 3
|
||||
#define _MM_PCOMCTRL_EQ 4
|
||||
#define _MM_PCOMCTRL_NEQ 5
|
||||
#define _MM_PCOMCTRL_FALSE 6
|
||||
#define _MM_PCOMCTRL_TRUE 7
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comlt_epu8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comle_epu8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comgt_epu8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comge_epu8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comeq_epu8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comneq_epu8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comfalse_epu8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comtrue_epu8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comlt_epu16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comle_epu16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comgt_epu16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comge_epu16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comeq_epu16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comneq_epu16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comfalse_epu16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comtrue_epu16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comlt_epu32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comle_epu32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comgt_epu32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comge_epu32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comeq_epu32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comneq_epu32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comfalse_epu32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comtrue_epu32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comlt_epu64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comle_epu64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comgt_epu64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comge_epu64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comeq_epu64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comneq_epu64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comfalse_epu64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comtrue_epu64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comlt_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comle_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comgt_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comge_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comeq_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comneq_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comfalse_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comtrue_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comlt_epi16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comle_epi16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comgt_epi16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comge_epi16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comeq_epi16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comneq_epi16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comfalse_epi16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comtrue_epi16(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comlt_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comle_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comgt_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comge_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comeq_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comneq_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comfalse_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comtrue_epi32(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comlt_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comle_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comgt_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comge_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comeq_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comneq_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comfalse_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comtrue_epi64(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
|
||||
}
|
||||
|
||||
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
|
||||
__m128d __X = (X); \
|
||||
__m128d __Y = (Y); \
|
||||
|
Loading…
x
Reference in New Issue
Block a user