From c62bb98f4f4ca52fcb765bb81b526ceb433ae157 Mon Sep 17 00:00:00 2001 From: Nicholas Hutchinson Date: Tue, 1 Apr 2014 21:27:54 +0100 Subject: [PATCH] Add builtin includes for AppleClang 5.1 Copied from Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib --- python/clang/5.1/include/Intrin.h | 784 + python/clang/5.1/include/__wmmintrin_aes.h | 67 + python/clang/5.1/include/__wmmintrin_pclmul.h | 34 + python/clang/5.1/include/aarch64_simd.h | 7377 ++++++++++ python/clang/5.1/include/altivec.h | 11856 ++++++++++++++++ python/clang/5.1/include/ammintrin.h | 68 + python/clang/5.1/include/arm_neon.h | 7342 ++++++++++ python/clang/5.1/include/avx2intrin.h | 1206 ++ python/clang/5.1/include/avxintrin.h | 1224 ++ python/clang/5.1/include/bmi2intrin.h | 94 + python/clang/5.1/include/bmiintrin.h | 115 + python/clang/5.1/include/cpuid.h | 156 + python/clang/5.1/include/emmintrin.h | 1451 ++ python/clang/5.1/include/f16cintrin.h | 58 + python/clang/5.1/include/float.h | 124 + python/clang/5.1/include/fma4intrin.h | 231 + python/clang/5.1/include/fmaintrin.h | 229 + python/clang/5.1/include/immintrin.h | 118 + python/clang/5.1/include/iso646.h | 43 + python/clang/5.1/include/limits.h | 119 + python/clang/5.1/include/lzcntintrin.h | 55 + python/clang/5.1/include/mm3dnow.h | 162 + python/clang/5.1/include/mm_malloc.h | 75 + python/clang/5.1/include/mmintrin.h | 503 + python/clang/5.1/include/module.map | 165 + python/clang/5.1/include/nmmintrin.h | 35 + python/clang/5.1/include/pmmintrin.h | 117 + python/clang/5.1/include/popcntintrin.h | 45 + python/clang/5.1/include/prfchwintrin.h | 39 + python/clang/5.1/include/rdseedintrin.h | 52 + python/clang/5.1/include/rtmintrin.h | 54 + .../5.1/include/sanitizer/asan_interface.h | 137 + .../include/sanitizer/common_interface_defs.h | 54 + .../5.1/include/sanitizer/dfsan_interface.h | 87 + .../include/sanitizer/linux_syscall_hooks.h | 3070 ++++ .../5.1/include/sanitizer/lsan_interface.h | 52 + .../5.1/include/sanitizer/msan_interface.h | 162 + python/clang/5.1/include/shaintrin.h | 74 + python/clang/5.1/include/smmintrin.h | 468 + python/clang/5.1/include/stdalign.h | 35 + python/clang/5.1/include/stdarg.h | 50 + python/clang/5.1/include/stdbool.h | 44 + python/clang/5.1/include/stddef.h | 102 + python/clang/5.1/include/stdint.h | 708 + python/clang/5.1/include/stdnoreturn.h | 30 + python/clang/5.1/include/tbmintrin.h | 158 + python/clang/5.1/include/tgmath.h | 1374 ++ python/clang/5.1/include/tmmintrin.h | 225 + python/clang/5.1/include/unwind.h | 280 + python/clang/5.1/include/varargs.h | 26 + python/clang/5.1/include/wmmintrin.h | 42 + python/clang/5.1/include/x86intrin.h | 79 + python/clang/5.1/include/xmmintrin.h | 1001 ++ python/clang/5.1/include/xopintrin.h | 804 ++ 54 files changed, 43060 insertions(+) create mode 100644 python/clang/5.1/include/Intrin.h create mode 100644 python/clang/5.1/include/__wmmintrin_aes.h create mode 100644 python/clang/5.1/include/__wmmintrin_pclmul.h create mode 100644 python/clang/5.1/include/aarch64_simd.h create mode 100644 python/clang/5.1/include/altivec.h create mode 100644 python/clang/5.1/include/ammintrin.h create mode 100644 python/clang/5.1/include/arm_neon.h create mode 100644 python/clang/5.1/include/avx2intrin.h create mode 100644 python/clang/5.1/include/avxintrin.h create mode 100644 python/clang/5.1/include/bmi2intrin.h create mode 100644 python/clang/5.1/include/bmiintrin.h create mode 100644 python/clang/5.1/include/cpuid.h create mode 100644 python/clang/5.1/include/emmintrin.h create mode 100644 python/clang/5.1/include/f16cintrin.h create mode 100644 python/clang/5.1/include/float.h create mode 100644 python/clang/5.1/include/fma4intrin.h create mode 100644 python/clang/5.1/include/fmaintrin.h create mode 100644 python/clang/5.1/include/immintrin.h create mode 100644 python/clang/5.1/include/iso646.h create mode 100644 python/clang/5.1/include/limits.h create mode 100644 python/clang/5.1/include/lzcntintrin.h create mode 100644 python/clang/5.1/include/mm3dnow.h create mode 100644 python/clang/5.1/include/mm_malloc.h create mode 100644 python/clang/5.1/include/mmintrin.h create mode 100644 python/clang/5.1/include/module.map create mode 100644 python/clang/5.1/include/nmmintrin.h create mode 100644 python/clang/5.1/include/pmmintrin.h create mode 100644 python/clang/5.1/include/popcntintrin.h create mode 100644 python/clang/5.1/include/prfchwintrin.h create mode 100644 python/clang/5.1/include/rdseedintrin.h create mode 100644 python/clang/5.1/include/rtmintrin.h create mode 100644 python/clang/5.1/include/sanitizer/asan_interface.h create mode 100644 python/clang/5.1/include/sanitizer/common_interface_defs.h create mode 100644 python/clang/5.1/include/sanitizer/dfsan_interface.h create mode 100644 python/clang/5.1/include/sanitizer/linux_syscall_hooks.h create mode 100644 python/clang/5.1/include/sanitizer/lsan_interface.h create mode 100644 python/clang/5.1/include/sanitizer/msan_interface.h create mode 100644 python/clang/5.1/include/shaintrin.h create mode 100644 python/clang/5.1/include/smmintrin.h create mode 100644 python/clang/5.1/include/stdalign.h create mode 100644 python/clang/5.1/include/stdarg.h create mode 100644 python/clang/5.1/include/stdbool.h create mode 100644 python/clang/5.1/include/stddef.h create mode 100644 python/clang/5.1/include/stdint.h create mode 100644 python/clang/5.1/include/stdnoreturn.h create mode 100644 python/clang/5.1/include/tbmintrin.h create mode 100644 python/clang/5.1/include/tgmath.h create mode 100644 python/clang/5.1/include/tmmintrin.h create mode 100644 python/clang/5.1/include/unwind.h create mode 100644 python/clang/5.1/include/varargs.h create mode 100644 python/clang/5.1/include/wmmintrin.h create mode 100644 python/clang/5.1/include/x86intrin.h create mode 100644 python/clang/5.1/include/xmmintrin.h create mode 100644 python/clang/5.1/include/xopintrin.h diff --git a/python/clang/5.1/include/Intrin.h b/python/clang/5.1/include/Intrin.h new file mode 100644 index 00000000..43764647 --- /dev/null +++ b/python/clang/5.1/include/Intrin.h @@ -0,0 +1,784 @@ +/* ===-------- Intrin.h ---------------------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +/* Only include this if we're compiling for the windows platform. */ +#ifndef _MSC_VER +#include_next +#else + +#ifndef __INTRIN_H +#define __INTRIN_H + +/* First include the standard intrinsics. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* And the random ones that aren't in those files. */ +__m64 _m_from_float(float); +__m64 _m_from_int(int _l); +void _m_prefetch(void *); +float _m_to_float(__m64); +int _m_to_int(__m64 _M); + +/* Other assorted instruction intrinsics. */ +void __addfsbyte(unsigned long, unsigned char); +void __addfsdword(unsigned long, unsigned long); +void __addfsword(unsigned long, unsigned short); +void __code_seg(const char *); +void __cpuid(int[4], int); +void __cpuidex(int[4], int, int); +void __debugbreak(void); +__int64 __emul(int, int); +unsigned __int64 __emulu(unsigned int, unsigned int); +void __cdecl __fastfail(unsigned int); +unsigned int __getcallerseflags(void); +void __halt(void); +unsigned char __inbyte(unsigned short); +void __inbytestring(unsigned short, unsigned char *, unsigned long); +void __incfsbyte(unsigned long); +void __incfsdword(unsigned long); +void __incfsword(unsigned long); +unsigned long __indword(unsigned short); +void __indwordstring(unsigned short, unsigned long *, unsigned long); +void __int2c(void); +void __invlpg(void *); +unsigned short __inword(unsigned short); +void __inwordstring(unsigned short, unsigned short *, unsigned long); +void __lidt(void *); +unsigned __int64 __ll_lshift(unsigned __int64, int); +__int64 __ll_rshift(__int64, int); +void __llwpcb(void *); +unsigned char __lwpins32(unsigned int, unsigned int, unsigned int); +void __lwpval32(unsigned int, unsigned int, unsigned int); +unsigned int __lzcnt(unsigned int); +unsigned short __lzcnt16(unsigned short); +void __movsb(unsigned char *, unsigned char const *, size_t); +void __movsd(unsigned long *, unsigned long const *, size_t); +void __movsw(unsigned short *, unsigned short const *, size_t); +void __nop(void); +void __nvreg_restore_fence(void); +void __nvreg_save_fence(void); +void __outbyte(unsigned short, unsigned char); +void __outbytestring(unsigned short, unsigned char *, unsigned long); +void __outdword(unsigned short, unsigned long); +void __outdwordstring(unsigned short, unsigned long *, unsigned long); +void __outword(unsigned short, unsigned short); +void __outwordstring(unsigned short, unsigned short *, unsigned long); +static __inline__ +unsigned int __popcnt(unsigned int); +static __inline__ +unsigned short __popcnt16(unsigned short); +unsigned __int64 __rdtsc(void); +unsigned __int64 __rdtscp(unsigned int *); +unsigned long __readcr0(void); +unsigned long __readcr2(void); +unsigned long __readcr3(void); +unsigned long __readcr5(void); +unsigned long __readcr8(void); +unsigned int __readdr(unsigned int); +unsigned int __readeflags(void); +unsigned char __readfsbyte(unsigned long); +unsigned long __readfsdword(unsigned long); +unsigned __int64 __readfsqword(unsigned long); +unsigned short __readfsword(unsigned long); +unsigned __int64 __readmsr(unsigned long); +unsigned __int64 __readpmc(unsigned long); +unsigned long __segmentlimit(unsigned long); +void __sidt(void *); +void *__slwpcb(void); +void __stosb(unsigned char *, unsigned char, size_t); +void __stosd(unsigned long *, unsigned long, size_t); +void __stosw(unsigned short *, unsigned short, size_t); +void __svm_clgi(void); +void __svm_invlpga(void *, int); +void __svm_skinit(int); +void __svm_stgi(void); +void __svm_vmload(size_t); +void __svm_vmrun(size_t); +void __svm_vmsave(size_t); +void __ud2(void); +unsigned __int64 __ull_rshift(unsigned __int64, int); +void __vmx_off(void); +void __vmx_vmptrst(unsigned __int64 *); +void __wbinvd(void); +void __writecr0(unsigned int); +void __writecr3(unsigned int); +void __writecr4(unsigned int); +void __writecr8(unsigned int); +void __writedr(unsigned int, unsigned int); +void __writeeflags(unsigned int); +void __writefsbyte(unsigned long, unsigned char); +void __writefsdword(unsigned long, unsigned long); +void __writefsqword(unsigned long, unsigned __int64); +void __writefsword(unsigned long, unsigned short); +void __writemsr(unsigned long, unsigned __int64); +static __inline__ +void *_AddressOfReturnAddress(void); +unsigned int _andn_u32(unsigned int, unsigned int); +unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); +unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int); +unsigned int _bextri_u32(unsigned int, unsigned int); +static __inline__ +unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); +static __inline__ +unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); +static __inline__ +unsigned char _bittest(long const *, long); +static __inline__ +unsigned char _bittestandcomplement(long *, long); +static __inline__ +unsigned char _bittestandreset(long *, long); +static __inline__ +unsigned char _bittestandset(long *, long); +unsigned int _blcfill_u32(unsigned int); +unsigned int _blci_u32(unsigned int); +unsigned int _blcic_u32(unsigned int); +unsigned int _blcmsk_u32(unsigned int); +unsigned int _blcs_u32(unsigned int); +unsigned int _blsfill_u32(unsigned int); +unsigned int _blsi_u32(unsigned int); +unsigned int _blsic_u32(unsigned int); +unsigned int _blsmsk_u32(unsigned int); +unsigned int _blsmsk_u32(unsigned int); +unsigned int _blsr_u32(unsigned int); +unsigned int _blsr_u32(unsigned int); +unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); +unsigned long __cdecl _byteswap_ulong(unsigned long); +unsigned short __cdecl _byteswap_ushort(unsigned short); +unsigned _bzhi_u32(unsigned int, unsigned int); +void __cdecl _disable(void); +void __cdecl _enable(void); +void __cdecl _fxrstor(void const *); +void __cdecl _fxsave(void *); +long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value); +static __inline__ +long _InterlockedAnd(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedAnd16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedAnd8(char volatile *_Value, char _Mask); +unsigned char _interlockedbittestandreset(long volatile *, long); +unsigned char _interlockedbittestandset(long volatile *, long); +static __inline__ +long __cdecl _InterlockedCompareExchange(long volatile *_Destination, + long _Exchange, long _Comparand); +long _InterlockedCompareExchange_HLEAcquire(long volatile *, long, long); +long _InterlockedCompareExchange_HLERelease(long volatile *, long, long); +static __inline__ +short _InterlockedCompareExchange16(short volatile *_Destination, + short _Exchange, short _Comparand); +static __inline__ +__int64 _InterlockedCompareExchange64(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand); +__int64 _InterlockedcompareExchange64_HLEAcquire(__int64 volatile *, __int64, + __int64); +__int64 _InterlockedCompareExchange64_HLERelease(__int64 volatile *, __int64, + __int64); +static __inline__ +char _InterlockedCompareExchange8(char volatile *_Destination, char _Exchange, + char _Comparand); +void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *, + void *); +void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *, + void *); +static __inline__ +long __cdecl _InterlockedDecrement(long volatile *_Addend); +static __inline__ +short _InterlockedDecrement16(short volatile *_Addend); +static __inline__ +long __cdecl _InterlockedExchange(long volatile *_Target, long _Value); +static __inline__ +short _InterlockedExchange16(short volatile *_Target, short _Value); +static __inline__ +char _InterlockedExchange8(char volatile *_Target, char _Value); +static __inline__ +long __cdecl _InterlockedExchangeAdd(long volatile *_Addend, long _Value); +long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); +long _InterlockedExchangeAdd_HLERelease(long volatile *, long); +static __inline__ +char _InterlockedExchangeAdd8(char volatile *_Addend, char _Value); +static __inline__ +long __cdecl _InterlockedIncrement(long volatile *_Addend); +static __inline__ +short _InterlockedIncrement16(short volatile *_Addend); +static __inline__ +long _InterlockedOr(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedOr16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedOr8(char volatile *_Value, char _Mask); +static __inline__ +long _InterlockedXor(long volatile *_Value, long _Mask); +static __inline__ +short _InterlockedXor16(short volatile *_Value, short _Mask); +static __inline__ +char _InterlockedXor8(char volatile *_Value, char _Mask); +void __cdecl _invpcid(unsigned int, void *); +static __inline__ +unsigned long __cdecl _lrotl(unsigned long, int); +static __inline__ +unsigned long __cdecl _lrotr(unsigned long, int); +static __inline__ +unsigned int _lzcnt_u32(unsigned int); +static __inline__ +void _ReadBarrier(void); +static __inline__ +void _ReadWriteBarrier(void); +static __inline__ +void *_ReturnAddress(void); +unsigned int _rorx_u32(unsigned int, const unsigned int); +int __cdecl _rdrand16_step(unsigned short *); +int __cdecl _rdrand32_step(unsigned int *); +static __inline__ +unsigned int __cdecl _rotl(unsigned int _Value, int _Shift); +static __inline__ +unsigned short _rotl16(unsigned short _Value, unsigned char _Shift); +static __inline__ +unsigned __int64 __cdecl _rotl64(unsigned __int64 _Value, int _Shift); +static __inline__ +unsigned char _rotl8(unsigned char _Value, unsigned char _Shift); +static __inline__ +unsigned int __cdecl _rotr(unsigned int _Value, int _Shift); +static __inline__ +unsigned short _rotr16(unsigned short _Value, unsigned char _Shift); +static __inline__ +unsigned __int64 __cdecl _rotr64(unsigned __int64 _Value, int _Shift); +static __inline__ +unsigned char _rotr8(unsigned char _Value, unsigned char _Shift); +int _sarx_i32(int, unsigned int); + +/* FIXME: Need definition for jmp_buf. + int __cdecl _setjmp(jmp_buf); */ + +unsigned int _shlx_u32(unsigned int, unsigned int); +unsigned int _shrx_u32(unsigned int, unsigned int); +void _Store_HLERelease(long volatile *, long); +void _Store64_HLERelease(__int64 volatile *, __int64); +void _StorePointer_HLERelease(void *volatile *, void *); +unsigned int _t1mskc_u32(unsigned int); +unsigned int _tzcnt_u32(unsigned int); +unsigned int _tzcnt_u32(unsigned int); +unsigned int _tzmsk_u32(unsigned int); +static __inline__ +void _WriteBarrier(void); +void _xabort(const unsigned int imm); +unsigned __int32 xbegin(void); +void _xend(void); +unsigned __int64 __cdecl _xgetbv(unsigned int); +void __cdecl _xrstor(void const *, unsigned __int64); +void __cdecl _xsave(void *, unsigned __int64); +void __cdecl _xsaveopt(void *, unsigned __int64); +void __cdecl _xsetbv(unsigned int, unsigned __int64); +unsigned char _xtest(void); + +/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ +#ifdef __x86_64__ +void __addgsbyte(unsigned long, unsigned char); +void __addgsdword(unsigned long, unsigned long); +void __addgsqword(unsigned long, unsigned __int64); +void __addgsword(unsigned long, unsigned short); +void __faststorefence(void); +void __incgsbyte(unsigned long); +void __incgsdword(unsigned long); +void __incgsqword(unsigned long); +void __incgsword(unsigned long); +unsigned __int64 __popcnt64(unsigned __int64); +unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, + unsigned __int64 _HighPart, + unsigned char _Shift); +unsigned __int64 __shiftright128(unsigned __int64 _LowPart, + unsigned __int64 _HighPart, + unsigned char _Shift); +void __stosq(unsigned __int64 *, unsigned __int64, size_t); +unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64); +unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int); +unsigned __int64 _bextri_u64(unsigned __int64, unsigned int); +static __inline__ +unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); +static __inline__ +unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); +static __inline__ +unsigned char _bittest64(__int64 const *, __int64); +static __inline__ +unsigned char _bittestandcomplement64(__int64 *, __int64); +static __inline__ +unsigned char _bittestandreset64(__int64 *, __int64); +static __inline__ +unsigned char _bittestandset64(__int64 *, __int64); +unsigned __int64 _blcfill_u64(unsigned __int64); +unsigned __int64 _blci_u64(unsigned __int64); +unsigned __int64 _blcic_u64(unsigned __int64); +unsigned __int64 _blcmsk_u64(unsigned __int64); +unsigned __int64 _blcs_u64(unsigned __int64); +unsigned __int64 _blsfill_u64(unsigned __int64); +unsigned __int64 _blsi_u64(unsigned __int64); +unsigned __int64 _blsic_u64(unsigned __int64); +unsigned __int64 _blmsk_u64(unsigned __int64); +unsigned __int64 _blsr_u64(unsigned __int64); +unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64); +unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int); +void __cdecl _fxrstor64(void const *); +void __cdecl _fxsave64(void *); +long _InterlockedAnd_np(long volatile *_Value, long _Mask); +short _InterlockedAnd16_np(short volatile *_Value, short _Mask); +__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedAnd8_np(char volatile *_Value, char _Mask); +unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); +unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); +long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, + long _Comparand); +unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_CompareandResult); +unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +short _InterlockedCompareExchange16_np(short volatile *_Destination, + short _Exchange, short _Comparand); +__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand); +void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination, + void *_Exchange, void *_Comparand); +long _InterlockedOr_np(long volatile *_Value, long _Mask); +short _InterlockedOr16_np(short volatile *_Value, short _Mask); +__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedOr8_np(char volatile *_Value, char _Mask); +long _InterlockedXor_np(long volatile *_Value, long _Mask); +short _InterlockedXor16_np(short volatile *_Value, short _Mask); +__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask); +char _InterlockedXor8_np(char volatile *_Value, char _Mask); +unsigned __int64 _lzcnt_u64(unsigned __int64); +__int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand, + __int64 *_HighProduct); +unsigned int __cdecl _readfsbase_u32(void); +unsigned __int64 __cdecl _readfsbase_u64(void); +unsigned int __cdecl _readgsbase_u32(void); +unsigned __int64 __cdecl _readgsbase_u64(void); +unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); +unsigned __int64 _tzcnt_u64(unsigned __int64); +unsigned __int64 _tzmsk_u64(unsigned __int64); +unsigned __int64 _umul128(unsigned __int64 _Multiplier, + unsigned __int64 _Multiplicand, + unsigned __int64 *_HighProduct); +void __cdecl _writefsbase_u32(unsigned int); +void _cdecl _writefsbase_u64(unsigned __int64); +void __cdecl _writegsbase_u32(unsigned int); +void __cdecl _writegsbase_u64(unsigned __int64); +void __cdecl _xrstor64(void const *, unsigned __int64); +void __cdecl _xsave64(void *, unsigned __int64); +void __cdecl _xsaveopt64(void *, unsigned __int64); + +#endif /* __x86_64__ */ + +/*----------------------------------------------------------------------------*\ +|* Bit Twiddling +\*----------------------------------------------------------------------------*/ +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_rotl8(unsigned char _Value, unsigned char _Shift) { + _Shift &= 0x7; + return _Shift ? (_Value << _Shift) | (_Value >> (8 - _Shift)) : _Value; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_rotr8(unsigned char _Value, unsigned char _Shift) { + _Shift &= 0x7; + return _Shift ? (_Value >> _Shift) | (_Value << (8 - _Shift)) : _Value; +} +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +_rotl16(unsigned short _Value, unsigned char _Shift) { + _Shift &= 0xf; + return _Shift ? (_Value << _Shift) | (_Value >> (16 - _Shift)) : _Value; +} +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +_rotr16(unsigned short _Value, unsigned char _Shift) { + _Shift &= 0xf; + return _Shift ? (_Value >> _Shift) | (_Value << (16 - _Shift)) : _Value; +} +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_rotl(unsigned int _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; +} +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_rotr(unsigned int _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; +} +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) +_lrotl(unsigned long _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value << _Shift) | (_Value >> (32 - _Shift)) : _Value; +} +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) +_lrotr(unsigned long _Value, int _Shift) { + _Shift &= 0x1f; + return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value; +} +static +__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +_rotl64(unsigned __int64 _Value, int _Shift) { + _Shift &= 0x3f; + return _Shift ? (_Value << _Shift) | (_Value >> (64 - _Shift)) : _Value; +} +static +__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +_rotr64(unsigned __int64 _Value, int _Shift) { + _Shift &= 0x3f; + return _Shift ? (_Value >> _Shift) | (_Value << (64 - _Shift)) : _Value; +} +/*----------------------------------------------------------------------------*\ +|* Bit Counting and Testing +\*----------------------------------------------------------------------------*/ +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_BitScanForward(unsigned long *_Index, unsigned long _Mask) { + if (!_Mask) + return 0; + *_Index = __builtin_ctzl(_Mask); + return 1; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_BitScanReverse(unsigned long *_Index, unsigned long _Mask) { + if (!_Mask) + return 0; + *_Index = 31 - __builtin_clzl(_Mask); + return 1; +} +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_lzcnt_u32(unsigned int a) { + if (!a) + return 32; + return __builtin_clzl(a); +} +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +__popcnt16(unsigned short value) { + return __builtin_popcount((int)value); +} +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__popcnt(unsigned int value) { + return __builtin_popcount(value); +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittest(long const *a, long b) { + return (*a >> b) & 1; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandcomplement(long *a, long b) { + unsigned char x = (*a >> b) & 1; + *a = *a ^ (1 << b); + return x; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandreset(long *a, long b) { + unsigned char x = (*a >> b) & 1; + *a = *a & ~(1 << b); + return x; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandset(long *a, long b) { + unsigned char x = (*a >> b) & 1; + *a = *a | (1 << b); + return x; +} +#ifdef __x86_64__ +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask) { + if (!_Mask) + return 0; + *_Index = __builtin_ctzll(_Mask); + return 1; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask) { + if (!_Mask) + return 0; + *_Index = 63 - __builtin_clzll(_Mask); + return 1; +} +static +__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__)) +_lzcnt_u64(unsigned __int64 a) { + if (!a) + return 64; + return __builtin_clzll(a); +} +static __inline__ +unsigned __int64 __attribute__((__always_inline__, __nodebug__)) + __popcnt64(unsigned __int64 value) { + return __builtin_popcountll(value); +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittest64(__int64 const *a, __int64 b) { + return (*a >> b) & 1; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandcomplement64(__int64 *a, __int64 b) { + unsigned char x = (*a >> b) & 1; + *a = *a ^ (1ll << b); + return x; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandreset64(__int64 *a, __int64 b) { + unsigned char x = (*a >> b) & 1; + *a = *a & ~(1ll << b); + return x; +} +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +_bittestandset64(__int64 *a, __int64 b) { + unsigned char x = (*a >> b) & 1; + *a = *a | (1ll << b); + return x; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange Add +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeAdd8(char volatile *_Addend, char _Value) { + return __atomic_add_fetch(_Addend, _Value, 0) - _Value; +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeAdd16(short volatile *_Addend, short _Value) { + return __atomic_add_fetch(_Addend, _Value, 0) - _Value; +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeAdd(long volatile *_Addend, long _Value) { + return __atomic_add_fetch(_Addend, _Value, 0) - _Value; +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value) { + return __atomic_add_fetch(_Addend, _Value, 0) - _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange Sub +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeSub8(char volatile *_Subend, char _Value) { + return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeSub16(short volatile *_Subend, short _Value) { + return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeSub(long volatile *_Subend, long _Value) { + return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value) { + return __atomic_sub_fetch(_Subend, _Value, 0) + _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Increment +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedIncrement16(char volatile *_Value) { + return __atomic_add_fetch(_Value, 1, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedIncrement(long volatile *_Value) { + return __atomic_add_fetch(_Value, 1, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedIncrement64(__int64 volatile *_Value) { + return __atomic_add_fetch(_Value, 1, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Decrement +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedDecrement16(char volatile *_Value) { + return __atomic_sub_fetch(_Value, 1, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedDecrement(long volatile *_Value) { + return __atomic_sub_fetch(_Value, 1, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedDecrement64(__int64 volatile *_Value) { + return __atomic_sub_fetch(_Value, 1, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked And +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedAnd8(char volatile *_Value, char _Mask) { + return __atomic_and_fetch(_Value, _Mask, 0); +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedAnd16(short volatile *_Value, short _Mask) { + return __atomic_and_fetch(_Value, _Mask, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedAnd(long volatile *_Value, long _Mask) { + return __atomic_and_fetch(_Value, _Mask, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_and_fetch(_Value, _Mask, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Or +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedOr8(char volatile *_Value, char _Mask) { + return __atomic_or_fetch(_Value, _Mask, 0); +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedOr16(short volatile *_Value, short _Mask) { + return __atomic_or_fetch(_Value, _Mask, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedOr(long volatile *_Value, long _Mask) { + return __atomic_or_fetch(_Value, _Mask, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedOr64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_or_fetch(_Value, _Mask, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Xor +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedXor8(char volatile *_Value, char _Mask) { + return __atomic_xor_fetch(_Value, _Mask, 0); +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedXor16(short volatile *_Value, short _Mask) { + return __atomic_xor_fetch(_Value, _Mask, 0); +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedXor(long volatile *_Value, long _Mask) { + return __atomic_xor_fetch(_Value, _Mask, 0); +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedXor64(__int64 volatile *_Value, __int64 _Mask) { + return __atomic_xor_fetch(_Value, _Mask, 0); +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Exchange +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchange8(char volatile *_Target, char _Value) { + __atomic_exchange(_Target, &_Value, &_Value, 0); + return _Value; +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchange16(short volatile *_Target, short _Value) { + __atomic_exchange(_Target, &_Value, &_Value, 0); + return _Value; +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchange(long volatile *_Target, long _Value) { + __atomic_exchange(_Target, &_Value, &_Value, 0); + return _Value; +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedExchange64(__int64 volatile *_Target, __int64 _Value) { + __atomic_exchange(_Target, &_Value, &_Value, 0); + return _Value; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Interlocked Compare Exchange +\*----------------------------------------------------------------------------*/ +static __inline__ char __attribute__((__always_inline__, __nodebug__)) +_InterlockedCompareExchange8(char volatile *_Destination, + char _Exchange, char _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + return _Comparand; +} +static __inline__ short __attribute__((__always_inline__, __nodebug__)) +_InterlockedCompareExchange16(short volatile *_Destination, + short _Exchange, short _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + return _Comparand; +} +static __inline__ long __attribute__((__always_inline__, __nodebug__)) +_InterlockedCompareExchange(long volatile *_Destination, + long _Exchange, long _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + return _Comparand; +} +#ifdef __x86_64__ +static __inline__ __int64 __attribute__((__always_inline__, __nodebug__)) +_InterlockedCompareExchange64(__int64 volatile *_Destination, + __int64 _Exchange, __int64 _Comparand) { + __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0, 0, 0); + return _Comparand; +} +#endif +/*----------------------------------------------------------------------------*\ +|* Barriers +\*----------------------------------------------------------------------------*/ +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +_ReadWriteBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +_ReadBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__attribute__((deprecated("use other intrinsics or C++11 atomics instead"))) +_WriteBarrier(void) { + __asm__ volatile ("" : : : "memory"); +} +/*----------------------------------------------------------------------------*\ +|* Misc +\*----------------------------------------------------------------------------*/ +static __inline__ void * __attribute__((__always_inline__, __nodebug__)) +_AddressOfReturnAddress(void) { + return (void*)((char*)__builtin_frame_address(0) + sizeof(void*)); +} +static __inline__ void * __attribute__((__always_inline__, __nodebug__)) +_ReturnAddress(void) { + return __builtin_return_address(0); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INTRIN_H */ +#endif /* _MSC_VER */ diff --git a/python/clang/5.1/include/__wmmintrin_aes.h b/python/clang/5.1/include/__wmmintrin_aes.h new file mode 100644 index 00000000..2bfa027e --- /dev/null +++ b/python/clang/5.1/include/__wmmintrin_aes.h @@ -0,0 +1,67 @@ +/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef _WMMINTRIN_AES_H +#define _WMMINTRIN_AES_H + +#include + +#if !defined (__AES__) +# error "AES instructions not enabled" +#else + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_aesenc_si128(__m128i __V, __m128i __R) +{ + return (__m128i)__builtin_ia32_aesenc128(__V, __R); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_aesenclast_si128(__m128i __V, __m128i __R) +{ + return (__m128i)__builtin_ia32_aesenclast128(__V, __R); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_aesdec_si128(__m128i __V, __m128i __R) +{ + return (__m128i)__builtin_ia32_aesdec128(__V, __R); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_aesdeclast_si128(__m128i __V, __m128i __R) +{ + return (__m128i)__builtin_ia32_aesdeclast128(__V, __R); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_aesimc_si128(__m128i __V) +{ + return (__m128i)__builtin_ia32_aesimc128(__V); +} + +#define _mm_aeskeygenassist_si128(C, R) \ + __builtin_ia32_aeskeygenassist128((C), (R)) + +#endif + +#endif /* _WMMINTRIN_AES_H */ diff --git a/python/clang/5.1/include/__wmmintrin_pclmul.h b/python/clang/5.1/include/__wmmintrin_pclmul.h new file mode 100644 index 00000000..8d1f1b7c --- /dev/null +++ b/python/clang/5.1/include/__wmmintrin_pclmul.h @@ -0,0 +1,34 @@ +/*===---- __wmmintrin_pclmul.h - AES intrinsics ----------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef _WMMINTRIN_PCLMUL_H +#define _WMMINTRIN_PCLMUL_H + +#if !defined (__PCLMUL__) +# error "PCLMUL instruction is not enabled" +#else +#define _mm_clmulepi64_si128(__X, __Y, __I) \ + ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \ + (__v2di)(__m128i)(__Y), (char)(__I))) +#endif + +#endif /* _WMMINTRIN_PCLMUL_H */ diff --git a/python/clang/5.1/include/aarch64_simd.h b/python/clang/5.1/include/aarch64_simd.h new file mode 100644 index 00000000..f072d7d7 --- /dev/null +++ b/python/clang/5.1/include/aarch64_simd.h @@ -0,0 +1,7377 @@ +/*===---- aarch64_simd.h - ARM64 SIMD intrinsics ---------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __AARCH64_SIMD_H +#define __AARCH64_SIMD_H + +#if defined(__aarch64__) && !defined(__arm64__) +#include "arm_neon.h" +#else +#ifndef __AARCH64_SIMD__ +#error "AdvSIMD support not enabled" +#endif + +#include + +typedef uint16_t float16_t; +typedef float float32_t; +typedef double float64_t; +typedef uint8_t poly8_t; +typedef uint16_t poly16_t; +typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; +typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; +typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; +typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; +typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t; +typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; +typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t; +typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; +typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; +typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; +typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; +typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; +typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; +typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; +typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; +typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; +typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; +typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; +typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; +typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; +typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t; +typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t; +typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; +typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; +typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t; +typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; + +typedef struct int8x8x2_t { + int8x8_t val[2]; +} int8x8x2_t; + +typedef struct int8x16x2_t { + int8x16_t val[2]; +} int8x16x2_t; + +typedef struct int16x4x2_t { + int16x4_t val[2]; +} int16x4x2_t; + +typedef struct int16x8x2_t { + int16x8_t val[2]; +} int16x8x2_t; + +typedef struct int32x2x2_t { + int32x2_t val[2]; +} int32x2x2_t; + +typedef struct int32x4x2_t { + int32x4_t val[2]; +} int32x4x2_t; + +typedef struct int64x1x2_t { + int64x1_t val[2]; +} int64x1x2_t; + +typedef struct int64x2x2_t { + int64x2_t val[2]; +} int64x2x2_t; + +typedef struct uint8x8x2_t { + uint8x8_t val[2]; +} uint8x8x2_t; + +typedef struct uint8x16x2_t { + uint8x16_t val[2]; +} uint8x16x2_t; + +typedef struct uint16x4x2_t { + uint16x4_t val[2]; +} uint16x4x2_t; + +typedef struct uint16x8x2_t { + uint16x8_t val[2]; +} uint16x8x2_t; + +typedef struct uint32x2x2_t { + uint32x2_t val[2]; +} uint32x2x2_t; + +typedef struct uint32x4x2_t { + uint32x4_t val[2]; +} uint32x4x2_t; + +typedef struct uint64x1x2_t { + uint64x1_t val[2]; +} uint64x1x2_t; + +typedef struct uint64x2x2_t { + uint64x2_t val[2]; +} uint64x2x2_t; + +typedef struct float16x4x2_t { + float16x4_t val[2]; +} float16x4x2_t; + +typedef struct float16x8x2_t { + float16x8_t val[2]; +} float16x8x2_t; + +typedef struct float32x2x2_t { + float32x2_t val[2]; +} float32x2x2_t; + +typedef struct float32x4x2_t { + float32x4_t val[2]; +} float32x4x2_t; + +typedef struct float64x1x2_t { + float64x1_t val[2]; +} float64x1x2_t; + +typedef struct float64x2x2_t { + float64x2_t val[2]; +} float64x2x2_t; + +typedef struct poly8x8x2_t { + poly8x8_t val[2]; +} poly8x8x2_t; + +typedef struct poly8x16x2_t { + poly8x16_t val[2]; +} poly8x16x2_t; + +typedef struct poly16x4x2_t { + poly16x4_t val[2]; +} poly16x4x2_t; + +typedef struct poly16x8x2_t { + poly16x8_t val[2]; +} poly16x8x2_t; + +typedef struct int8x8x3_t { + int8x8_t val[3]; +} int8x8x3_t; + +typedef struct int8x16x3_t { + int8x16_t val[3]; +} int8x16x3_t; + +typedef struct int16x4x3_t { + int16x4_t val[3]; +} int16x4x3_t; + +typedef struct int16x8x3_t { + int16x8_t val[3]; +} int16x8x3_t; + +typedef struct int32x2x3_t { + int32x2_t val[3]; +} int32x2x3_t; + +typedef struct int32x4x3_t { + int32x4_t val[3]; +} int32x4x3_t; + +typedef struct int64x1x3_t { + int64x1_t val[3]; +} int64x1x3_t; + +typedef struct int64x2x3_t { + int64x2_t val[3]; +} int64x2x3_t; + +typedef struct uint8x8x3_t { + uint8x8_t val[3]; +} uint8x8x3_t; + +typedef struct uint8x16x3_t { + uint8x16_t val[3]; +} uint8x16x3_t; + +typedef struct uint16x4x3_t { + uint16x4_t val[3]; +} uint16x4x3_t; + +typedef struct uint16x8x3_t { + uint16x8_t val[3]; +} uint16x8x3_t; + +typedef struct uint32x2x3_t { + uint32x2_t val[3]; +} uint32x2x3_t; + +typedef struct uint32x4x3_t { + uint32x4_t val[3]; +} uint32x4x3_t; + +typedef struct uint64x1x3_t { + uint64x1_t val[3]; +} uint64x1x3_t; + +typedef struct uint64x2x3_t { + uint64x2_t val[3]; +} uint64x2x3_t; + +typedef struct float16x4x3_t { + float16x4_t val[3]; +} float16x4x3_t; + +typedef struct float16x8x3_t { + float16x8_t val[3]; +} float16x8x3_t; + +typedef struct float32x2x3_t { + float32x2_t val[3]; +} float32x2x3_t; + +typedef struct float32x4x3_t { + float32x4_t val[3]; +} float32x4x3_t; + +typedef struct float64x1x3_t { + float64x1_t val[3]; +} float64x1x3_t; + +typedef struct float64x2x3_t { + float64x2_t val[3]; +} float64x2x3_t; + +typedef struct poly8x8x3_t { + poly8x8_t val[3]; +} poly8x8x3_t; + +typedef struct poly8x16x3_t { + poly8x16_t val[3]; +} poly8x16x3_t; + +typedef struct poly16x4x3_t { + poly16x4_t val[3]; +} poly16x4x3_t; + +typedef struct poly16x8x3_t { + poly16x8_t val[3]; +} poly16x8x3_t; + +typedef struct int8x8x4_t { + int8x8_t val[4]; +} int8x8x4_t; + +typedef struct int8x16x4_t { + int8x16_t val[4]; +} int8x16x4_t; + +typedef struct int16x4x4_t { + int16x4_t val[4]; +} int16x4x4_t; + +typedef struct int16x8x4_t { + int16x8_t val[4]; +} int16x8x4_t; + +typedef struct int32x2x4_t { + int32x2_t val[4]; +} int32x2x4_t; + +typedef struct int32x4x4_t { + int32x4_t val[4]; +} int32x4x4_t; + +typedef struct int64x1x4_t { + int64x1_t val[4]; +} int64x1x4_t; + +typedef struct int64x2x4_t { + int64x2_t val[4]; +} int64x2x4_t; + +typedef struct uint8x8x4_t { + uint8x8_t val[4]; +} uint8x8x4_t; + +typedef struct uint8x16x4_t { + uint8x16_t val[4]; +} uint8x16x4_t; + +typedef struct uint16x4x4_t { + uint16x4_t val[4]; +} uint16x4x4_t; + +typedef struct uint16x8x4_t { + uint16x8_t val[4]; +} uint16x8x4_t; + +typedef struct uint32x2x4_t { + uint32x2_t val[4]; +} uint32x2x4_t; + +typedef struct uint32x4x4_t { + uint32x4_t val[4]; +} uint32x4x4_t; + +typedef struct uint64x1x4_t { + uint64x1_t val[4]; +} uint64x1x4_t; + +typedef struct uint64x2x4_t { + uint64x2_t val[4]; +} uint64x2x4_t; + +typedef struct float16x4x4_t { + float16x4_t val[4]; +} float16x4x4_t; + +typedef struct float16x8x4_t { + float16x8_t val[4]; +} float16x8x4_t; + +typedef struct float32x2x4_t { + float32x2_t val[4]; +} float32x2x4_t; + +typedef struct float32x4x4_t { + float32x4_t val[4]; +} float32x4x4_t; + +typedef struct float64x1x4_t { + float64x1_t val[4]; +} float64x1x4_t; + +typedef struct float64x2x4_t { + float64x2_t val[4]; +} float64x2x4_t; + +typedef struct poly8x8x4_t { + poly8x8_t val[4]; +} poly8x8x4_t; + +typedef struct poly8x16x4_t { + poly8x16_t val[4]; +} poly8x16x4_t; + +typedef struct poly16x4x4_t { + poly16x4_t val[4]; +} poly16x4x4_t; + +typedef struct poly16x8x4_t { + poly16x8_t val[4]; +} poly16x8x4_t; + +#define __ai static inline __attribute__((__always_inline__, __nodebug__)) + +__ai int8x8_t vget_high_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai int16x4_t vget_high_s16(int16x8_t __a) { + return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); } +__ai int32x2_t vget_high_s32(int32x4_t __a) { + return __builtin_shufflevector(__a, __a, 2, 3); } +__ai int64x1_t vget_high_s64(int64x2_t __a) { + return __builtin_shufflevector(__a, __a, 1); } +__ai float16x4_t vget_high_f16(float16x8_t __a) { + return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); } +__ai float32x2_t vget_high_f32(float32x4_t __a) { + return __builtin_shufflevector(__a, __a, 2, 3); } +__ai float64x1_t vget_high_f64(float64x2_t __a) { + return __builtin_shufflevector(__a, __a, 1); } +__ai uint8x8_t vget_high_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai uint16x4_t vget_high_u16(uint16x8_t __a) { + return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); } +__ai uint32x2_t vget_high_u32(uint32x4_t __a) { + return __builtin_shufflevector(__a, __a, 2, 3); } +__ai uint64x1_t vget_high_u64(uint64x2_t __a) { + return __builtin_shufflevector(__a, __a, 1); } +__ai poly8x8_t vget_high_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai poly16x4_t vget_high_p16(poly16x8_t __a) { + return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); } + +__ai int16x8_t vmovl_s8(int8x8_t __a) { + return (int16x8_t)__builtin_arm64_vmovl_v(__a, 33); } +__ai int32x4_t vmovl_s16(int16x4_t __a) { + return (int32x4_t)__builtin_arm64_vmovl_v((int8x8_t)__a, 34); } +__ai int64x2_t vmovl_s32(int32x2_t __a) { + return (int64x2_t)__builtin_arm64_vmovl_v((int8x8_t)__a, 35); } +__ai uint16x8_t vmovl_u8(uint8x8_t __a) { + return (uint16x8_t)__builtin_arm64_vmovl_v((int8x8_t)__a, 49); } +__ai uint32x4_t vmovl_u16(uint16x4_t __a) { + return (uint32x4_t)__builtin_arm64_vmovl_v((int8x8_t)__a, 50); } +__ai uint64x2_t vmovl_u32(uint32x2_t __a) { + return (uint64x2_t)__builtin_arm64_vmovl_v((int8x8_t)__a, 51); } + +__ai int16x8_t vmovl_high_s8(int8x16_t __a) { + return vmovl_s8(vget_high_s8(__a)); } +__ai int32x4_t vmovl_high_s16(int16x8_t __a) { + return vmovl_s16(vget_high_s16(__a)); } +__ai int64x2_t vmovl_high_s32(int32x4_t __a) { + return vmovl_s32(vget_high_s32(__a)); } +__ai uint16x8_t vmovl_high_u8(uint8x16_t __a) { + return vmovl_u8(vget_high_u8(__a)); } +__ai uint32x4_t vmovl_high_u16(uint16x8_t __a) { + return vmovl_u16(vget_high_u16(__a)); } +__ai uint64x2_t vmovl_high_u32(uint32x4_t __a) { + return vmovl_u32(vget_high_u32(__a)); } + +__ai int16x8_t vmull_s8(int8x8_t __a, int8x8_t __b) { + return (int16x8_t)__builtin_arm64_vmull_v(__a, __b, 33); } +__ai int32x4_t vmull_s16(int16x4_t __a, int16x4_t __b) { + return (int32x4_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)__b, 34); } +__ai int64x2_t vmull_s32(int32x2_t __a, int32x2_t __b) { + return (int64x2_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)__b, 35); } +__ai uint16x8_t vmull_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint16x8_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)__b, 49); } +__ai uint32x4_t vmull_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint32x4_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)__b, 50); } +__ai uint64x2_t vmull_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint64x2_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)__b, 51); } +__ai poly16x8_t vmull_p8(poly8x8_t __a, poly8x8_t __b) { + return (poly16x8_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)__b, 37); } + +__ai int16x8_t vmull_high_s8(int8x16_t __a, int8x16_t __b) { + return vmull_s8(vget_high_s8(__a), vget_high_s8(__b)); } +__ai int32x4_t vmull_high_s16(int16x8_t __a, int16x8_t __b) { + return vmull_s16(vget_high_s16(__a), vget_high_s16(__b)); } +__ai int64x2_t vmull_high_s32(int32x4_t __a, int32x4_t __b) { + return vmull_s32(vget_high_s32(__a), vget_high_s32(__b)); } +__ai uint16x8_t vmull_high_u8(uint8x16_t __a, uint8x16_t __b) { + return vmull_u8(vget_high_u8(__a), vget_high_u8(__b)); } +__ai uint32x4_t vmull_high_u16(uint16x8_t __a, uint16x8_t __b) { + return vmull_u16(vget_high_u16(__a), vget_high_u16(__b)); } +__ai uint64x2_t vmull_high_u32(uint32x4_t __a, uint32x4_t __b) { + return vmull_u32(vget_high_u32(__a), vget_high_u32(__b)); } + +__ai int8x8_t vabd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vabd_v(__a, __b, 0); } +__ai int16x4_t vabd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vabd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vabd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vabd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vabd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vabd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vabd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vabd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vabd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vabd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vabd_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vabd_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vabdq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vabdq_v(__a, __b, 32); } +__ai int16x8_t vabdq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vabdq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vabdq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vabdq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vabdq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vabdq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vabdq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int32x4_t vqdmull_s16(int16x4_t __a, int16x4_t __b) { + return (int32x4_t)__builtin_arm64_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 34); } +__ai int64x2_t vqdmull_s32(int32x2_t __a, int32x2_t __b) { + return (int64x2_t)__builtin_arm64_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 35); } + +__ai int32_t vqdmullh_s16(int16_t __a, int16_t __b) { + return (int32_t)__builtin_arm64_vqdmullh_s16(__a, __b); } + +__ai int64_t vqdmulls_s32(int32_t __a, int32_t __b) { + return (int64_t)__builtin_arm64_vqdmulls_s32(__a, __b); } + +__ai int8x8_t vqsub_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vqsub_v(__a, __b, 0); } +__ai int16x4_t vqsub_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqsub_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vqsub_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_arm64_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vqsub_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vqsub_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vqsub_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vqsub_u64(uint64x1_t __a, uint64x1_t __b) { + return (uint64x1_t)__builtin_arm64_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vqsubq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vqsubq_v(__a, __b, 32); } +__ai int16x8_t vqsubq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqsubq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqsubq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vqsubq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vqsubq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqsubq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vqsubq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int32_t vqsubs_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_arm64_vqsubs_s32(__a, __b); } +__ai uint32_t vqsubs_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_arm64_vqsubs_u32(__a, __b); } + +__ai int64_t vqsubd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vqsubd_s64(__a, __b); } +__ai uint64_t vqsubd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vqsubd_u64(__a, __b); } + +__ai int16_t vaddlv_s8(int8x8_t __a) { + return (int16_t)__builtin_arm64_vaddlv_s8(__a); } +__ai int32_t vaddlv_s16(int16x4_t __a) { + return (int32_t)__builtin_arm64_vaddlv_s16(__a); } +__ai int64_t vaddlv_s32(int32x2_t __a) { + return (int64_t)__builtin_arm64_vaddlv_s32(__a); } +__ai uint16_t vaddlv_u8(uint8x8_t __a) { + return (uint16_t)__builtin_arm64_vaddlv_u8((int8x8_t)__a); } +__ai uint32_t vaddlv_u16(uint16x4_t __a) { + return (uint32_t)__builtin_arm64_vaddlv_u16((int16x4_t)__a); } +__ai uint64_t vaddlv_u32(uint32x2_t __a) { + return (uint64_t)__builtin_arm64_vaddlv_u32((int32x2_t)__a); } +__ai int16_t vaddlvq_s8(int8x16_t __a) { + return (int16_t)__builtin_arm64_vaddlvq_s8(__a); } +__ai int32_t vaddlvq_s16(int16x8_t __a) { + return (int32_t)__builtin_arm64_vaddlvq_s16(__a); } +__ai int64_t vaddlvq_s32(int32x4_t __a) { + return (int64_t)__builtin_arm64_vaddlvq_s32(__a); } +__ai uint16_t vaddlvq_u8(uint8x16_t __a) { + return (uint16_t)__builtin_arm64_vaddlvq_u8((int8x16_t)__a); } +__ai uint32_t vaddlvq_u16(uint16x8_t __a) { + return (uint32_t)__builtin_arm64_vaddlvq_u16((int16x8_t)__a); } +__ai uint64_t vaddlvq_u32(uint32x4_t __a) { + return (uint64_t)__builtin_arm64_vaddlvq_u32((int32x4_t)__a); } + +__ai int8_t vaddv_s8(int8x8_t __a) { + return (int8_t)__builtin_arm64_vaddv_s8(__a); } +__ai int16_t vaddv_s16(int16x4_t __a) { + return (int16_t)__builtin_arm64_vaddv_s16(__a); } +__ai int32_t vaddv_s32(int32x2_t __a) { + return (int32_t)__builtin_arm64_vaddv_s32(__a); } +__ai uint8_t vaddv_u8(uint8x8_t __a) { + return (uint8_t)__builtin_arm64_vaddv_u8((int8x8_t)__a); } +__ai uint16_t vaddv_u16(uint16x4_t __a) { + return (uint16_t)__builtin_arm64_vaddv_u16((int16x4_t)__a); } +__ai uint32_t vaddv_u32(uint32x2_t __a) { + return (uint32_t)__builtin_arm64_vaddv_u32((int32x2_t)__a); } +__ai int8_t vaddvq_s8(int8x16_t __a) { + return (int8_t)__builtin_arm64_vaddvq_s8(__a); } +__ai int16_t vaddvq_s16(int16x8_t __a) { + return (int16_t)__builtin_arm64_vaddvq_s16(__a); } +__ai int32_t vaddvq_s32(int32x4_t __a) { + return (int32_t)__builtin_arm64_vaddvq_s32(__a); } +__ai uint8_t vaddvq_u8(uint8x16_t __a) { + return (uint8_t)__builtin_arm64_vaddvq_u8((int8x16_t)__a); } +__ai uint16_t vaddvq_u16(uint16x8_t __a) { + return (uint16_t)__builtin_arm64_vaddvq_u16((int16x8_t)__a); } +__ai uint32_t vaddvq_u32(uint32x4_t __a) { + return (uint32_t)__builtin_arm64_vaddvq_u32((int32x4_t)__a); } + +__ai float32_t vmaxnmv_f32(float32x2_t __a) { + return (float32_t)__builtin_arm64_vmaxnmv_f32(__a); } +__ai float32_t vmaxnmvq_f32(float32x4_t __a) { + return (float32_t)__builtin_arm64_vmaxnmvq_f32(__a); } + +__ai int8_t vmaxv_s8(int8x8_t __a) { + return (int8_t)__builtin_arm64_vmaxv_s8(__a); } +__ai int16_t vmaxv_s16(int16x4_t __a) { + return (int16_t)__builtin_arm64_vmaxv_s16(__a); } +__ai int32_t vmaxv_s32(int32x2_t __a) { + return (int32_t)__builtin_arm64_vmaxv_s32(__a); } +__ai float32_t vmaxv_f32(float32x2_t __a) { + return (float32_t)__builtin_arm64_vmaxv_f32(__a); } +__ai uint8_t vmaxv_u8(uint8x8_t __a) { + return (uint8_t)__builtin_arm64_vmaxv_u8((int8x8_t)__a); } +__ai uint16_t vmaxv_u16(uint16x4_t __a) { + return (uint16_t)__builtin_arm64_vmaxv_u16((int16x4_t)__a); } +__ai uint32_t vmaxv_u32(uint32x2_t __a) { + return (uint32_t)__builtin_arm64_vmaxv_u32((int32x2_t)__a); } +__ai int8_t vmaxvq_s8(int8x16_t __a) { + return (int8_t)__builtin_arm64_vmaxvq_s8(__a); } +__ai int16_t vmaxvq_s16(int16x8_t __a) { + return (int16_t)__builtin_arm64_vmaxvq_s16(__a); } +__ai int32_t vmaxvq_s32(int32x4_t __a) { + return (int32_t)__builtin_arm64_vmaxvq_s32(__a); } +__ai uint8_t vmaxvq_u8(uint8x16_t __a) { + return (uint8_t)__builtin_arm64_vmaxvq_u8((int8x16_t)__a); } +__ai uint16_t vmaxvq_u16(uint16x8_t __a) { + return (uint16_t)__builtin_arm64_vmaxvq_u16((int16x8_t)__a); } +__ai uint32_t vmaxvq_u32(uint32x4_t __a) { + return (uint32_t)__builtin_arm64_vmaxvq_u32((int32x4_t)__a); } +__ai float32_t vmaxvq_f32(float32x4_t __a) { + return (float32_t)__builtin_arm64_vmaxvq_f32(__a); } +__ai float64_t vmaxvq_f64(float64x2_t __a) { + return (float64_t)__builtin_arm64_vmaxvq_f64(__a); } + +__ai float32_t vminnmv_f32(float32x2_t __a) { + return (float32_t)__builtin_arm64_vminnmv_f32(__a); } +__ai float32_t vminnmvq_f32(float32x4_t __a) { + return (float32_t)__builtin_arm64_vminnmvq_f32(__a); } + +__ai int8_t vminv_s8(int8x8_t __a) { + return (int8_t)__builtin_arm64_vminv_s8(__a); } +__ai int16_t vminv_s16(int16x4_t __a) { + return (int16_t)__builtin_arm64_vminv_s16(__a); } +__ai int32_t vminv_s32(int32x2_t __a) { + return (int32_t)__builtin_arm64_vminv_s32(__a); } +__ai float32_t vminv_f32(float32x2_t __a) { + return (float32_t)__builtin_arm64_vminv_f32(__a); } +__ai uint8_t vminv_u8(uint8x8_t __a) { + return (uint8_t)__builtin_arm64_vminv_u8((int8x8_t)__a); } +__ai uint16_t vminv_u16(uint16x4_t __a) { + return (uint16_t)__builtin_arm64_vminv_u16((int16x4_t)__a); } +__ai uint32_t vminv_u32(uint32x2_t __a) { + return (uint32_t)__builtin_arm64_vminv_u32((int32x2_t)__a); } +__ai int8_t vminvq_s8(int8x16_t __a) { + return (int8_t)__builtin_arm64_vminvq_s8(__a); } +__ai int16_t vminvq_s16(int16x8_t __a) { + return (int16_t)__builtin_arm64_vminvq_s16(__a); } +__ai int32_t vminvq_s32(int32x4_t __a) { + return (int32_t)__builtin_arm64_vminvq_s32(__a); } +__ai uint8_t vminvq_u8(uint8x16_t __a) { + return (uint8_t)__builtin_arm64_vminvq_u8((int8x16_t)__a); } +__ai uint16_t vminvq_u16(uint16x8_t __a) { + return (uint16_t)__builtin_arm64_vminvq_u16((int16x8_t)__a); } +__ai uint32_t vminvq_u32(uint32x4_t __a) { + return (uint32_t)__builtin_arm64_vminvq_u32((int32x4_t)__a); } +__ai float32_t vminvq_f32(float32x4_t __a) { + return (float32_t)__builtin_arm64_vminvq_f32(__a); } +__ai float64_t vminvq_f64(float64x2_t __a) { + return (float64_t)__builtin_arm64_vminvq_f64(__a); } + +__ai int8x8_t vaba_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a + vabd_s8(__b, __c); } +__ai int16x4_t vaba_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a + vabd_s16(__b, __c); } +__ai int32x2_t vaba_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a + vabd_s32(__b, __c); } +__ai uint8x8_t vaba_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a + vabd_u8(__b, __c); } +__ai uint16x4_t vaba_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a + vabd_u16(__b, __c); } +__ai uint32x2_t vaba_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a + vabd_u32(__b, __c); } +__ai int8x16_t vabaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { + return __a + vabdq_s8(__b, __c); } +__ai int16x8_t vabaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { + return __a + vabdq_s16(__b, __c); } +__ai int32x4_t vabaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { + return __a + vabdq_s32(__b, __c); } +__ai uint8x16_t vabaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return __a + vabdq_u8(__b, __c); } +__ai uint16x8_t vabaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return __a + vabdq_u16(__b, __c); } +__ai uint32x4_t vabaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return __a + vabdq_u32(__b, __c); } + +__ai int16x8_t vabal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { + return (int16x8_t)__builtin_arm64_vabal_v((int8x16_t)__a, __b, __c, 33); } +__ai int32x4_t vabal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return (int32x4_t)__builtin_arm64_vabal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 34); } +__ai int64x2_t vabal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return (int64x2_t)__builtin_arm64_vabal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 35); } +__ai uint16x8_t vabal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return (uint16x8_t)__builtin_arm64_vabal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 49); } +__ai uint32x4_t vabal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return (uint32x4_t)__builtin_arm64_vabal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 50); } +__ai uint64x2_t vabal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return (uint64x2_t)__builtin_arm64_vabal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 51); } + +__ai int16x8_t vabal_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) { + return (int16x8_t)__builtin_arm64_vabal_high_v((int8x16_t)__a, __b, __c, 33); } +__ai int32x4_t vabal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return (int32x4_t)__builtin_arm64_vabal_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } +__ai int64x2_t vabal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return (int64x2_t)__builtin_arm64_vabal_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 35); } +__ai uint16x8_t vabal_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) { + return (uint16x8_t)__builtin_arm64_vabal_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 49); } +__ai uint32x4_t vabal_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) { + return (uint32x4_t)__builtin_arm64_vabal_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } +__ai uint64x2_t vabal_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint64x2_t)__builtin_arm64_vabal_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 51); } + +__ai float64_t vabdd_f64(float64_t __a, float64_t __b) { + return (float64_t)__builtin_arm64_vabdd_f64(__a, __b); } + +__ai int16x8_t vabdl_s8(int8x8_t __a, int8x8_t __b) { + return (int16x8_t)__builtin_arm64_vabdl_v(__a, __b, 33); } +__ai int32x4_t vabdl_s16(int16x4_t __a, int16x4_t __b) { + return (int32x4_t)__builtin_arm64_vabdl_v((int8x8_t)__a, (int8x8_t)__b, 34); } +__ai int64x2_t vabdl_s32(int32x2_t __a, int32x2_t __b) { + return (int64x2_t)__builtin_arm64_vabdl_v((int8x8_t)__a, (int8x8_t)__b, 35); } +__ai uint16x8_t vabdl_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint16x8_t)__builtin_arm64_vabdl_v((int8x8_t)__a, (int8x8_t)__b, 49); } +__ai uint32x4_t vabdl_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint32x4_t)__builtin_arm64_vabdl_v((int8x8_t)__a, (int8x8_t)__b, 50); } +__ai uint64x2_t vabdl_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint64x2_t)__builtin_arm64_vabdl_v((int8x8_t)__a, (int8x8_t)__b, 51); } + +__ai int16x8_t vabdl_high_s8(int8x16_t __a, int8x16_t __b) { + return (int16x8_t)__builtin_arm64_vabdl_high_v(__a, __b, 33); } +__ai int32x4_t vabdl_high_s16(int16x8_t __a, int16x8_t __b) { + return (int32x4_t)__builtin_arm64_vabdl_high_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vabdl_high_s32(int32x4_t __a, int32x4_t __b) { + return (int64x2_t)__builtin_arm64_vabdl_high_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint16x8_t vabdl_high_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint16x8_t)__builtin_arm64_vabdl_high_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vabdl_high_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint32x4_t)__builtin_arm64_vabdl_high_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vabdl_high_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint64x2_t)__builtin_arm64_vabdl_high_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai float32_t vabds_f32(float32_t __a, float32_t __b) { + return (float32_t)__builtin_arm64_vabds_f32(__a, __b); } + +__ai int8x8_t vabs_s8(int8x8_t __a) { + return (int8x8_t)__builtin_arm64_vabs_v(__a, 0); } +__ai int16x4_t vabs_s16(int16x4_t __a) { + return (int16x4_t)__builtin_arm64_vabs_v((int8x8_t)__a, 1); } +__ai int32x2_t vabs_s32(int32x2_t __a) { + return (int32x2_t)__builtin_arm64_vabs_v((int8x8_t)__a, 2); } +__ai float32x2_t vabs_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vabs_v((int8x8_t)__a, 7); } +__ai int8x16_t vabsq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_arm64_vabsq_v(__a, 32); } +__ai int16x8_t vabsq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_arm64_vabsq_v((int8x16_t)__a, 33); } +__ai int32x4_t vabsq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_arm64_vabsq_v((int8x16_t)__a, 34); } +__ai int64x2_t vabsq_s64(int64x2_t __a) { + return (int64x2_t)__builtin_arm64_vabsq_v((int8x16_t)__a, 35); } +__ai float32x4_t vabsq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vabsq_v((int8x16_t)__a, 39); } +__ai float64x2_t vabsq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vabsq_v((int8x16_t)__a, 40); } + +__ai int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b) { + return __a + __b; } +__ai int16x4_t vadd_s16(int16x4_t __a, int16x4_t __b) { + return __a + __b; } +__ai int32x2_t vadd_s32(int32x2_t __a, int32x2_t __b) { + return __a + __b; } +__ai int64x1_t vadd_s64(int64x1_t __a, int64x1_t __b) { + return __a + __b; } +__ai float32x2_t vadd_f32(float32x2_t __a, float32x2_t __b) { + return __a + __b; } +__ai uint8x8_t vadd_u8(uint8x8_t __a, uint8x8_t __b) { + return __a + __b; } +__ai uint16x4_t vadd_u16(uint16x4_t __a, uint16x4_t __b) { + return __a + __b; } +__ai uint32x2_t vadd_u32(uint32x2_t __a, uint32x2_t __b) { + return __a + __b; } +__ai uint64x1_t vadd_u64(uint64x1_t __a, uint64x1_t __b) { + return __a + __b; } +__ai int8x16_t vaddq_s8(int8x16_t __a, int8x16_t __b) { + return __a + __b; } +__ai int16x8_t vaddq_s16(int16x8_t __a, int16x8_t __b) { + return __a + __b; } +__ai int32x4_t vaddq_s32(int32x4_t __a, int32x4_t __b) { + return __a + __b; } +__ai int64x2_t vaddq_s64(int64x2_t __a, int64x2_t __b) { + return __a + __b; } +__ai float32x4_t vaddq_f32(float32x4_t __a, float32x4_t __b) { + return __a + __b; } +__ai float64x2_t vaddq_f64(float64x2_t __a, float64x2_t __b) { + return __a + __b; } +__ai uint8x16_t vaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a + __b; } +__ai uint16x8_t vaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a + __b; } +__ai uint32x4_t vaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a + __b; } +__ai uint64x2_t vaddq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a + __b; } + +__ai int64_t vaddd_s64(int64_t __a, int64_t __b) { + return __a + __b; } +__ai uint64_t vaddd_u64(uint64_t __a, uint64_t __b) { + return __a + __b; } + +__ai int8x8_t vaddhn_s16(int16x8_t __a, int16x8_t __b) { + return (int8x8_t)__builtin_arm64_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } +__ai int16x4_t vaddhn_s32(int32x4_t __a, int32x4_t __b) { + return (int16x4_t)__builtin_arm64_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } +__ai int32x2_t vaddhn_s64(int64x2_t __a, int64x2_t __b) { + return (int32x2_t)__builtin_arm64_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } +__ai uint8x8_t vaddhn_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint8x8_t)__builtin_arm64_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } +__ai uint16x4_t vaddhn_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint16x4_t)__builtin_arm64_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } +__ai uint32x2_t vaddhn_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint32x2_t)__builtin_arm64_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } + +__ai int8x16_t vaddhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) { + return (int8x16_t)__builtin_arm64_vaddhn_high_v(__a, (int8x16_t)__b, (int8x16_t)__c, 32); } +__ai int16x8_t vaddhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) { + return (int16x8_t)__builtin_arm64_vaddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 33); } +__ai int32x4_t vaddhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) { + return (int32x4_t)__builtin_arm64_vaddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } +__ai uint8x16_t vaddhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return (uint8x16_t)__builtin_arm64_vaddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); } +__ai uint16x8_t vaddhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint16x8_t)__builtin_arm64_vaddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 49); } +__ai uint32x4_t vaddhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return (uint32x4_t)__builtin_arm64_vaddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai int16x8_t vaddl_s8(int8x8_t __a, int8x8_t __b) { + return vmovl_s8(__a) + vmovl_s8(__b); } +__ai int32x4_t vaddl_s16(int16x4_t __a, int16x4_t __b) { + return vmovl_s16(__a) + vmovl_s16(__b); } +__ai int64x2_t vaddl_s32(int32x2_t __a, int32x2_t __b) { + return vmovl_s32(__a) + vmovl_s32(__b); } +__ai uint16x8_t vaddl_u8(uint8x8_t __a, uint8x8_t __b) { + return vmovl_u8(__a) + vmovl_u8(__b); } +__ai uint32x4_t vaddl_u16(uint16x4_t __a, uint16x4_t __b) { + return vmovl_u16(__a) + vmovl_u16(__b); } +__ai uint64x2_t vaddl_u32(uint32x2_t __a, uint32x2_t __b) { + return vmovl_u32(__a) + vmovl_u32(__b); } + +__ai int16x8_t vaddl_high_s8(int8x16_t __a, int8x16_t __b) { + return vmovl_high_s8(__a) + vmovl_high_s8(__b); } +__ai int32x4_t vaddl_high_s16(int16x8_t __a, int16x8_t __b) { + return vmovl_high_s16(__a) + vmovl_high_s16(__b); } +__ai int64x2_t vaddl_high_s32(int32x4_t __a, int32x4_t __b) { + return vmovl_high_s32(__a) + vmovl_high_s32(__b); } +__ai uint16x8_t vaddl_high_u8(uint8x16_t __a, uint8x16_t __b) { + return vmovl_high_u8(__a) + vmovl_high_u8(__b); } +__ai uint32x4_t vaddl_high_u16(uint16x8_t __a, uint16x8_t __b) { + return vmovl_high_u16(__a) + vmovl_high_u16(__b); } +__ai uint64x2_t vaddl_high_u32(uint32x4_t __a, uint32x4_t __b) { + return vmovl_high_u32(__a) + vmovl_high_u32(__b); } + +__ai int16x8_t vaddw_s8(int16x8_t __a, int8x8_t __b) { + return __a + vmovl_s8(__b); } +__ai int32x4_t vaddw_s16(int32x4_t __a, int16x4_t __b) { + return __a + vmovl_s16(__b); } +__ai int64x2_t vaddw_s32(int64x2_t __a, int32x2_t __b) { + return __a + vmovl_s32(__b); } +__ai uint16x8_t vaddw_u8(uint16x8_t __a, uint8x8_t __b) { + return __a + vmovl_u8(__b); } +__ai uint32x4_t vaddw_u16(uint32x4_t __a, uint16x4_t __b) { + return __a + vmovl_u16(__b); } +__ai uint64x2_t vaddw_u32(uint64x2_t __a, uint32x2_t __b) { + return __a + vmovl_u32(__b); } + +__ai int16x8_t vaddw_high_s8(int8x16_t __a, int8x16_t __b) { + return (int16x8_t)__builtin_arm64_vaddw_high_v(__a, __b, 33); } +__ai int32x4_t vaddw_high_s16(int16x8_t __a, int16x8_t __b) { + return (int32x4_t)__builtin_arm64_vaddw_high_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vaddw_high_s32(int32x4_t __a, int32x4_t __b) { + return (int64x2_t)__builtin_arm64_vaddw_high_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint16x8_t vaddw_high_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint16x8_t)__builtin_arm64_vaddw_high_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vaddw_high_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint32x4_t)__builtin_arm64_vaddw_high_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vaddw_high_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint64x2_t)__builtin_arm64_vaddw_high_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint8x16_t vaesdq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vaesdq_v((int8x16_t)__a, (int8x16_t)__b, 48); } + +__ai uint8x16_t vaeseq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vaeseq_v((int8x16_t)__a, (int8x16_t)__b, 48); } + +__ai uint8x16_t vaesimcq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_arm64_vaesimcq_v((int8x16_t)__a, 48); } + +__ai uint8x16_t vaesmcq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_arm64_vaesmcq_v((int8x16_t)__a, 48); } + +__ai int8x8_t vand_s8(int8x8_t __a, int8x8_t __b) { + return __a & __b; } +__ai int16x4_t vand_s16(int16x4_t __a, int16x4_t __b) { + return __a & __b; } +__ai int32x2_t vand_s32(int32x2_t __a, int32x2_t __b) { + return __a & __b; } +__ai int64x1_t vand_s64(int64x1_t __a, int64x1_t __b) { + return __a & __b; } +__ai uint8x8_t vand_u8(uint8x8_t __a, uint8x8_t __b) { + return __a & __b; } +__ai uint16x4_t vand_u16(uint16x4_t __a, uint16x4_t __b) { + return __a & __b; } +__ai uint32x2_t vand_u32(uint32x2_t __a, uint32x2_t __b) { + return __a & __b; } +__ai uint64x1_t vand_u64(uint64x1_t __a, uint64x1_t __b) { + return __a & __b; } +__ai int8x16_t vandq_s8(int8x16_t __a, int8x16_t __b) { + return __a & __b; } +__ai int16x8_t vandq_s16(int16x8_t __a, int16x8_t __b) { + return __a & __b; } +__ai int32x4_t vandq_s32(int32x4_t __a, int32x4_t __b) { + return __a & __b; } +__ai int64x2_t vandq_s64(int64x2_t __a, int64x2_t __b) { + return __a & __b; } +__ai uint8x16_t vandq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a & __b; } +__ai uint16x8_t vandq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a & __b; } +__ai uint32x4_t vandq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a & __b; } +__ai uint64x2_t vandq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a & __b; } + +__ai int8x8_t vbic_s8(int8x8_t __a, int8x8_t __b) { + return __a & ~__b; } +__ai int16x4_t vbic_s16(int16x4_t __a, int16x4_t __b) { + return __a & ~__b; } +__ai int32x2_t vbic_s32(int32x2_t __a, int32x2_t __b) { + return __a & ~__b; } +__ai int64x1_t vbic_s64(int64x1_t __a, int64x1_t __b) { + return __a & ~__b; } +__ai uint8x8_t vbic_u8(uint8x8_t __a, uint8x8_t __b) { + return __a & ~__b; } +__ai uint16x4_t vbic_u16(uint16x4_t __a, uint16x4_t __b) { + return __a & ~__b; } +__ai uint32x2_t vbic_u32(uint32x2_t __a, uint32x2_t __b) { + return __a & ~__b; } +__ai uint64x1_t vbic_u64(uint64x1_t __a, uint64x1_t __b) { + return __a & ~__b; } +__ai int8x16_t vbicq_s8(int8x16_t __a, int8x16_t __b) { + return __a & ~__b; } +__ai int16x8_t vbicq_s16(int16x8_t __a, int16x8_t __b) { + return __a & ~__b; } +__ai int32x4_t vbicq_s32(int32x4_t __a, int32x4_t __b) { + return __a & ~__b; } +__ai int64x2_t vbicq_s64(int64x2_t __a, int64x2_t __b) { + return __a & ~__b; } +__ai uint8x16_t vbicq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a & ~__b; } +__ai uint16x8_t vbicq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a & ~__b; } +__ai uint32x4_t vbicq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a & ~__b; } +__ai uint64x2_t vbicq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a & ~__b; } + +__ai int8x8_t vbsl_s8(uint8x8_t __a, int8x8_t __b, int8x8_t __c) { + return (int8x8_t)((__a & (uint8x8_t)__b) | (~__a & (uint8x8_t)__c)); } +__ai int16x4_t vbsl_s16(uint16x4_t __a, int16x4_t __b, int16x4_t __c) { + return (int16x4_t)((__a & (uint16x4_t)__b) | (~__a & (uint16x4_t)__c)); } +__ai int32x2_t vbsl_s32(uint32x2_t __a, int32x2_t __b, int32x2_t __c) { + return (int32x2_t)((__a & (uint32x2_t)__b) | (~__a & (uint32x2_t)__c)); } +__ai int64x1_t vbsl_s64(uint64x1_t __a, int64x1_t __b, int64x1_t __c) { + return (int64x1_t)((__a & (uint64x1_t)__b) | (~__a & (uint64x1_t)__c)); } +__ai uint8x8_t vbsl_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return (uint8x8_t)((__a & (uint8x8_t)__b) | (~__a & (uint8x8_t)__c)); } +__ai uint16x4_t vbsl_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return (uint16x4_t)((__a & (uint16x4_t)__b) | (~__a & (uint16x4_t)__c)); } +__ai uint32x2_t vbsl_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return (uint32x2_t)((__a & (uint32x2_t)__b) | (~__a & (uint32x2_t)__c)); } +__ai uint64x1_t vbsl_u64(uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) { + return (uint64x1_t)((__a & (uint64x1_t)__b) | (~__a & (uint64x1_t)__c)); } +__ai float32x2_t vbsl_f32(uint32x2_t __a, float32x2_t __b, float32x2_t __c) { + return (float32x2_t)((__a & (uint32x2_t)__b) | (~__a & (uint32x2_t)__c)); } +__ai poly8x8_t vbsl_p8(uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) { + return (poly8x8_t)((__a & (uint8x8_t)__b) | (~__a & (uint8x8_t)__c)); } +__ai poly16x4_t vbsl_p16(uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) { + return (poly16x4_t)((__a & (uint16x4_t)__b) | (~__a & (uint16x4_t)__c)); } +__ai int8x16_t vbslq_s8(uint8x16_t __a, int8x16_t __b, int8x16_t __c) { + return (int8x16_t)((__a & (uint8x16_t)__b) | (~__a & (uint8x16_t)__c)); } +__ai int16x8_t vbslq_s16(uint16x8_t __a, int16x8_t __b, int16x8_t __c) { + return (int16x8_t)((__a & (uint16x8_t)__b) | (~__a & (uint16x8_t)__c)); } +__ai int32x4_t vbslq_s32(uint32x4_t __a, int32x4_t __b, int32x4_t __c) { + return (int32x4_t)((__a & (uint32x4_t)__b) | (~__a & (uint32x4_t)__c)); } +__ai int64x2_t vbslq_s64(uint64x2_t __a, int64x2_t __b, int64x2_t __c) { + return (int64x2_t)((__a & (uint64x2_t)__b) | (~__a & (uint64x2_t)__c)); } +__ai uint8x16_t vbslq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return (uint8x16_t)((__a & (uint8x16_t)__b) | (~__a & (uint8x16_t)__c)); } +__ai uint16x8_t vbslq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return (uint16x8_t)((__a & (uint16x8_t)__b) | (~__a & (uint16x8_t)__c)); } +__ai uint32x4_t vbslq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)((__a & (uint32x4_t)__b) | (~__a & (uint32x4_t)__c)); } +__ai uint64x2_t vbslq_u64(uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return (uint64x2_t)((__a & (uint64x2_t)__b) | (~__a & (uint64x2_t)__c)); } +__ai float32x4_t vbslq_f32(uint32x4_t __a, float32x4_t __b, float32x4_t __c) { + return (float32x4_t)((__a & (uint32x4_t)__b) | (~__a & (uint32x4_t)__c)); } +__ai float64x2_t vbslq_f64(uint64x2_t __a, float64x2_t __b, float64x2_t __c) { + return (float64x2_t)((__a & (uint64x2_t)__b) | (~__a & (uint64x2_t)__c)); } +__ai poly8x16_t vbslq_p8(uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) { + return (poly8x16_t)((__a & (uint8x16_t)__b) | (~__a & (uint8x16_t)__c)); } +__ai poly16x8_t vbslq_p16(uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) { + return (poly16x8_t)((__a & (uint16x8_t)__b) | (~__a & (uint16x8_t)__c)); } + +__ai uint32x2_t vcage_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vcage_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint32x4_t vcageq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vcageq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint32x2_t vcagt_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint32x4_t vcagtq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vcagtq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint32x2_t vcale_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vcale_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint32x4_t vcaleq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vcaleq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint32x2_t vcalt_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint32x4_t vcaltq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vcaltq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint8x8_t vceq_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a == __b); } +__ai uint16x4_t vceq_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a == __b); } +__ai uint32x2_t vceq_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a == __b); } +__ai uint32x2_t vceq_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a == __b); } +__ai uint8x8_t vceq_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a == __b); } +__ai uint16x4_t vceq_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a == __b); } +__ai uint32x2_t vceq_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a == __b); } +__ai uint8x8_t vceq_p8(poly8x8_t __a, poly8x8_t __b) { + return (uint8x8_t)(__a == __b); } +__ai uint8x16_t vceqq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a == __b); } +__ai uint16x8_t vceqq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a == __b); } +__ai uint32x4_t vceqq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a == __b); } +__ai uint64x2_t vceqq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a == __b); } +__ai uint32x4_t vceqq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a == __b); } +__ai uint64x2_t vceqq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a == __b); } +__ai uint8x16_t vceqq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a == __b); } +__ai uint16x8_t vceqq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a == __b); } +__ai uint32x4_t vceqq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a == __b); } +__ai uint64x2_t vceqq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a == __b); } +__ai uint8x16_t vceqq_p8(poly8x16_t __a, poly8x16_t __b) { + return (uint8x16_t)(__a == __b); } + +__ai int64_t vceqd_s64(int64_t __a, int64_t __b) { + return (int64_t)(__a == __b); } +__ai uint64_t vceqd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)(__a == __b); } +__ai uint64_t vceqdq_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)(__a == __b); } +__ai float64_t vceqd_f64(float64_t __a, float64_t __b) { + return (float64_t)(__a == __b); } + +__ai float32_t vceqs_f32(float32_t __a, float32_t __b) { + return (float32_t)(__a == __b); } + +__ai int64_t vceqzd_s64(int64_t __a) { + return (int64_t)__builtin_arm64_vceqzd_s64(__a); } +__ai uint64_t vceqzd_u64(uint64_t __a) { + return (uint64_t)__builtin_arm64_vceqzd_u64(__a); } + +__ai uint8x8_t vcge_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a >= __b); } +__ai uint16x4_t vcge_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a >= __b); } +__ai uint32x2_t vcge_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a >= __b); } +__ai uint32x2_t vcge_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a >= __b); } +__ai uint8x8_t vcge_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a >= __b); } +__ai uint16x4_t vcge_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a >= __b); } +__ai uint32x2_t vcge_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a >= __b); } +__ai uint8x16_t vcgeq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a >= __b); } +__ai uint16x8_t vcgeq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a >= __b); } +__ai uint32x4_t vcgeq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a >= __b); } +__ai uint64x2_t vcgeq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a >= __b); } +__ai uint32x4_t vcgeq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a >= __b); } +__ai uint64x2_t vcgeq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a >= __b); } +__ai uint8x16_t vcgeq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a >= __b); } +__ai uint16x8_t vcgeq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a >= __b); } +__ai uint32x4_t vcgeq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a >= __b); } +__ai uint64x2_t vcgeq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a >= __b); } + +__ai int64_t vcgezd_s64(int64_t __a) { + return (int64_t)__builtin_arm64_vcgezd_s64(__a); } +__ai uint64_t vcgezd_u64(uint64_t __a) { + return (uint64_t)__builtin_arm64_vcgezd_u64(__a); } + +__ai uint8x8_t vcgt_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a > __b); } +__ai uint16x4_t vcgt_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a > __b); } +__ai uint32x2_t vcgt_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a > __b); } +__ai uint32x2_t vcgt_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a > __b); } +__ai uint8x8_t vcgt_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a > __b); } +__ai uint16x4_t vcgt_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a > __b); } +__ai uint32x2_t vcgt_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a > __b); } +__ai uint8x16_t vcgtq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a > __b); } +__ai uint16x8_t vcgtq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a > __b); } +__ai uint32x4_t vcgtq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a > __b); } +__ai uint64x2_t vcgtq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a > __b); } +__ai uint32x4_t vcgtq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a > __b); } +__ai uint64x2_t vcgtq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a > __b); } +__ai uint8x16_t vcgtq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a > __b); } +__ai uint16x8_t vcgtq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a > __b); } +__ai uint32x4_t vcgtq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a > __b); } +__ai uint64x2_t vcgtq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a > __b); } + +__ai int64_t vcgtd_s64(int64_t __a, int64_t __b) { + return (int64_t)(__a > __b); } +__ai uint64_t vcgtd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)(__a > __b); } + +__ai int64_t vcgtzd_s64(int64_t __a) { + return (int64_t)__builtin_arm64_vcgtzd_s64(__a); } + +__ai uint8x8_t vcle_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a <= __b); } +__ai uint16x4_t vcle_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a <= __b); } +__ai uint32x2_t vcle_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a <= __b); } +__ai uint32x2_t vcle_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a <= __b); } +__ai uint8x8_t vcle_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a <= __b); } +__ai uint16x4_t vcle_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a <= __b); } +__ai uint32x2_t vcle_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a <= __b); } +__ai uint8x16_t vcleq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a <= __b); } +__ai uint16x8_t vcleq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a <= __b); } +__ai uint32x4_t vcleq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a <= __b); } +__ai uint64x2_t vcleq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a <= __b); } +__ai uint32x4_t vcleq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a <= __b); } +__ai uint64x2_t vcleq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a <= __b); } +__ai uint8x16_t vcleq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a <= __b); } +__ai uint16x8_t vcleq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a <= __b); } +__ai uint32x4_t vcleq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a <= __b); } +__ai uint64x2_t vcleq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a <= __b); } + +__ai int64_t vcled_s64(int64_t __a, int64_t __b) { + return (int64_t)(__a <= __b); } +__ai uint64_t vcled_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)(__a <= __b); } + +__ai int64_t vclezd_s64(int64_t __a) { + return (int64_t)__builtin_arm64_vclezd_s64(__a); } +__ai uint64_t vclezd_u64(uint64_t __a) { + return (uint64_t)__builtin_arm64_vclezd_u64(__a); } + +__ai int8x8_t vcls_s8(int8x8_t __a) { + return (int8x8_t)__builtin_arm64_vcls_v(__a, 0); } +__ai int16x4_t vcls_s16(int16x4_t __a) { + return (int16x4_t)__builtin_arm64_vcls_v((int8x8_t)__a, 1); } +__ai int32x2_t vcls_s32(int32x2_t __a) { + return (int32x2_t)__builtin_arm64_vcls_v((int8x8_t)__a, 2); } +__ai int8x16_t vclsq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_arm64_vclsq_v(__a, 32); } +__ai int16x8_t vclsq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_arm64_vclsq_v((int8x16_t)__a, 33); } +__ai int32x4_t vclsq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_arm64_vclsq_v((int8x16_t)__a, 34); } + +__ai uint8x8_t vclt_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a < __b); } +__ai uint16x4_t vclt_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a < __b); } +__ai uint32x2_t vclt_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a < __b); } +__ai uint32x2_t vclt_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a < __b); } +__ai uint8x8_t vclt_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a < __b); } +__ai uint16x4_t vclt_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a < __b); } +__ai uint32x2_t vclt_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a < __b); } +__ai uint8x16_t vcltq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a < __b); } +__ai uint16x8_t vcltq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a < __b); } +__ai uint32x4_t vcltq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a < __b); } +__ai uint64x2_t vcltq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a < __b); } +__ai uint32x4_t vcltq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a < __b); } +__ai uint64x2_t vcltq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a < __b); } +__ai uint8x16_t vcltq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a < __b); } +__ai uint16x8_t vcltq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a < __b); } +__ai uint32x4_t vcltq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a < __b); } +__ai uint64x2_t vcltq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a < __b); } + +__ai int64_t vcltzd_s64(int64_t __a) { + return (int64_t)__builtin_arm64_vcltzd_s64(__a); } + +__ai int8x8_t vclz_s8(int8x8_t __a) { + return (int8x8_t)__builtin_arm64_vclz_v(__a, 0); } +__ai int16x4_t vclz_s16(int16x4_t __a) { + return (int16x4_t)__builtin_arm64_vclz_v((int8x8_t)__a, 1); } +__ai int32x2_t vclz_s32(int32x2_t __a) { + return (int32x2_t)__builtin_arm64_vclz_v((int8x8_t)__a, 2); } +__ai uint8x8_t vclz_u8(uint8x8_t __a) { + return (uint8x8_t)__builtin_arm64_vclz_v((int8x8_t)__a, 16); } +__ai uint16x4_t vclz_u16(uint16x4_t __a) { + return (uint16x4_t)__builtin_arm64_vclz_v((int8x8_t)__a, 17); } +__ai uint32x2_t vclz_u32(uint32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vclz_v((int8x8_t)__a, 18); } +__ai int8x16_t vclzq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_arm64_vclzq_v(__a, 32); } +__ai int16x8_t vclzq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_arm64_vclzq_v((int8x16_t)__a, 33); } +__ai int32x4_t vclzq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_arm64_vclzq_v((int8x16_t)__a, 34); } +__ai uint8x16_t vclzq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_arm64_vclzq_v((int8x16_t)__a, 48); } +__ai uint16x8_t vclzq_u16(uint16x8_t __a) { + return (uint16x8_t)__builtin_arm64_vclzq_v((int8x16_t)__a, 49); } +__ai uint32x4_t vclzq_u32(uint32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vclzq_v((int8x16_t)__a, 50); } + +__ai uint8x8_t vcnt_u8(uint8x8_t __a) { + return (uint8x8_t)__builtin_arm64_vcnt_v((int8x8_t)__a, 16); } +__ai int8x8_t vcnt_s8(int8x8_t __a) { + return (int8x8_t)__builtin_arm64_vcnt_v(__a, 0); } +__ai poly8x8_t vcnt_p8(poly8x8_t __a) { + return (poly8x8_t)__builtin_arm64_vcnt_v((int8x8_t)__a, 4); } +__ai uint8x16_t vcntq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_arm64_vcntq_v((int8x16_t)__a, 48); } +__ai int8x16_t vcntq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_arm64_vcntq_v(__a, 32); } +__ai poly8x16_t vcntq_p8(poly8x16_t __a) { + return (poly8x16_t)__builtin_arm64_vcntq_v((int8x16_t)__a, 36); } + +__ai int8x16_t vcombine_s8(int8x8_t __a, int8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai int16x8_t vcombine_s16(int16x4_t __a, int16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai int32x4_t vcombine_s32(int32x2_t __a, int32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3); } +__ai int64x2_t vcombine_s64(int64x1_t __a, int64x1_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1); } +__ai float16x8_t vcombine_f16(float16x4_t __a, float16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai float32x4_t vcombine_f32(float32x2_t __a, float32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3); } +__ai float64x2_t vcombine_f64(float64x1_t __a, float64x1_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1); } +__ai uint8x16_t vcombine_u8(uint8x8_t __a, uint8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai uint16x8_t vcombine_u16(uint16x4_t __a, uint16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai uint32x4_t vcombine_u32(uint32x2_t __a, uint32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3); } +__ai uint64x2_t vcombine_u64(uint64x1_t __a, uint64x1_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1); } +__ai poly8x16_t vcombine_p8(poly8x8_t __a, poly8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai poly16x8_t vcombine_p16(poly16x4_t __a, poly16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 1, 2, 3, 4, 5, 6, 7); } + +#define vcopyq_lane_s8(a, __b, c, __d) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __c = (c); \ + (int8x16_t)__builtin_arm64_vcopyq_lane_v(__a, __b, __c, __d, 32); }) +#define vcopyq_lane_s16(a, __b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __c = (c); \ + (int16x8_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 33); }) +#define vcopyq_lane_s32(a, __b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __c = (c); \ + (int32x4_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 34); }) +#define vcopyq_lane_s64(a, __b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __c = (c); \ + (int64x2_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 35); }) +#define vcopyq_lane_f32(a, __b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __c = (c); \ + (float32x4_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 39); }) +#define vcopyq_lane_f64(a, __b, c, __d) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __c = (c); \ + (float64x2_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 40); }) +#define vcopyq_lane_u8(a, __b, c, __d) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __c = (c); \ + (uint8x16_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 48); }) +#define vcopyq_lane_u16(a, __b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __c = (c); \ + (uint16x8_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 49); }) +#define vcopyq_lane_u32(a, __b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __c = (c); \ + (uint32x4_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 50); }) +#define vcopyq_lane_u64(a, __b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __c = (c); \ + (uint64x2_t)__builtin_arm64_vcopyq_lane_v((int8x16_t)__a, __b, (int8x16_t)__c, __d, 51); }) + +__ai int8x8_t vcreate_s8(uint64_t __a) { + return (int8x8_t)__a; } +__ai int16x4_t vcreate_s16(uint64_t __a) { + return (int16x4_t)__a; } +__ai int32x2_t vcreate_s32(uint64_t __a) { + return (int32x2_t)__a; } +__ai float16x4_t vcreate_f16(uint64_t __a) { + return (float16x4_t)__a; } +__ai float32x2_t vcreate_f32(uint64_t __a) { + return (float32x2_t)__a; } +__ai uint8x8_t vcreate_u8(uint64_t __a) { + return (uint8x8_t)__a; } +__ai uint16x4_t vcreate_u16(uint64_t __a) { + return (uint16x4_t)__a; } +__ai uint32x2_t vcreate_u32(uint64_t __a) { + return (uint32x2_t)__a; } +__ai uint64x1_t vcreate_u64(uint64_t __a) { + return (uint64x1_t)__a; } +__ai poly8x8_t vcreate_p8(uint64_t __a) { + return (poly8x8_t)__a; } +__ai poly16x4_t vcreate_p16(uint64_t __a) { + return (poly16x4_t)__a; } +__ai int64x1_t vcreate_s64(uint64_t __a) { + return (int64x1_t)__a; } + +__ai int64_t vcvtad_s64_f64(float64_t __a) { + return (int64_t)__builtin_arm64_vcvtad_s64_f64(__a); } + +__ai uint64_t vcvtad_u64_f64(float64_t __a) { + return (uint64_t)__builtin_arm64_vcvtad_u64_f64(__a); } + +__ai int32_t vcvtas_s32_f32(float32_t __a) { + return (int32_t)__builtin_arm64_vcvtas_s32_f32(__a); } + +__ai uint32_t vcvtas_u32_f32(float32_t __a) { + return (uint32_t)__builtin_arm64_vcvtas_u32_f32(__a); } + +__ai int32x2_t vcvta_s32_f32(float32x2_t __a) { + return (int32x2_t)__builtin_arm64_vcvta_s32_v((int8x8_t)__a, 2); } +__ai int32x4_t vcvtaq_s32_f32(float32x4_t __a) { + return (int32x4_t)__builtin_arm64_vcvtaq_s32_v((int8x16_t)__a, 34); } + +__ai int64x2_t vcvtaq_s64_f64(float64x2_t __a) { + return (int64x2_t)__builtin_arm64_vcvtaq_s64_v((int8x16_t)__a, 35); } + +__ai uint32x2_t vcvta_u32_f32(float32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vcvta_u32_v((int8x8_t)__a, 18); } +__ai uint32x4_t vcvtaq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vcvtaq_u32_v((int8x16_t)__a, 50); } + +__ai uint64x2_t vcvtaq_u64_f64(float64x2_t __a) { + return (uint64x2_t)__builtin_arm64_vcvtaq_u64_v((int8x16_t)__a, 51); } + +__ai float64_t vcvtd_f64_s64(int64_t __a) { + return (float64_t)__builtin_arm64_vcvtd_f64_s64(__a); } +__ai float64_t vcvtd_f64_u64(uint64_t __a) { + return (float64_t)__builtin_arm64_vcvtd_f64_u64(__a); } + +#define vcvtd_n_f64_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (float64_t)__builtin_arm64_vcvtd_n_f64_s64(__a, __b); }) +#define vcvtd_n_f64_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (float64_t)__builtin_arm64_vcvtd_n_f64_u64(__a, __b); }) + +#define vcvtd_n_s64_f64(a, __b) __extension__ ({ \ + float64_t __a = (a); \ + (int64_t)__builtin_arm64_vcvtd_n_s64_f64(__a, __b); }) + +#define vcvtd_n_u64_f64(a, __b) __extension__ ({ \ + float64_t __a = (a); \ + (uint64_t)__builtin_arm64_vcvtd_n_u64_f64(__a, __b); }) + +__ai int64_t vcvtd_s64_f64(float64_t __a) { + return (int64_t)__builtin_arm64_vcvtd_s64_f64(__a); } + +__ai uint64_t vcvtd_u64_f64(float64_t __a) { + return (uint64_t)__builtin_arm64_vcvtd_u64_f64(__a); } + +__ai int64_t vcvtmd_s64_f64(float64_t __a) { + return (int64_t)__builtin_arm64_vcvtmd_s64_f64(__a); } + +__ai uint64_t vcvtmd_u64_f64(float64_t __a) { + return (uint64_t)__builtin_arm64_vcvtmd_u64_f64(__a); } + +__ai int32_t vcvtms_s32_f32(float32_t __a) { + return (int32_t)__builtin_arm64_vcvtms_s32_f32(__a); } + +__ai uint32_t vcvtms_u32_f32(float32_t __a) { + return (uint32_t)__builtin_arm64_vcvtms_u32_f32(__a); } + +__ai int32x2_t vcvtm_s32_f32(float32x2_t __a) { + return (int32x2_t)__builtin_arm64_vcvtm_s32_v((int8x8_t)__a, 2); } +__ai int32x4_t vcvtmq_s32_f32(float32x4_t __a) { + return (int32x4_t)__builtin_arm64_vcvtmq_s32_v((int8x16_t)__a, 34); } + +__ai int64x2_t vcvtmq_s64_f64(float64x2_t __a) { + return (int64x2_t)__builtin_arm64_vcvtmq_s64_v((int8x16_t)__a, 35); } + +__ai uint32x2_t vcvtm_u32_f32(float32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vcvtm_u32_v((int8x8_t)__a, 18); } +__ai uint32x4_t vcvtmq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vcvtmq_u32_v((int8x16_t)__a, 50); } + +__ai uint64x2_t vcvtmq_u64_f64(float64x2_t __a) { + return (uint64x2_t)__builtin_arm64_vcvtmq_u64_v((int8x16_t)__a, 51); } + +__ai int64_t vcvtnd_s64_f64(float64_t __a) { + return (int64_t)__builtin_arm64_vcvtnd_s64_f64(__a); } + +__ai uint64_t vcvtnd_u64_f64(float64_t __a) { + return (uint64_t)__builtin_arm64_vcvtnd_u64_f64(__a); } + +__ai int32_t vcvtns_s32_f32(float32_t __a) { + return (int32_t)__builtin_arm64_vcvtns_s32_f32(__a); } + +__ai uint32_t vcvtns_u32_f32(float32_t __a) { + return (uint32_t)__builtin_arm64_vcvtns_u32_f32(__a); } + +__ai int32x2_t vcvtn_s32_f32(float32x2_t __a) { + return (int32x2_t)__builtin_arm64_vcvtn_s32_v((int8x8_t)__a, 2); } +__ai int32x4_t vcvtnq_s32_f32(float32x4_t __a) { + return (int32x4_t)__builtin_arm64_vcvtnq_s32_v((int8x16_t)__a, 34); } + +__ai int64x2_t vcvtnq_s64_f64(float64x2_t __a) { + return (int64x2_t)__builtin_arm64_vcvtnq_s64_v((int8x16_t)__a, 35); } + +__ai uint32x2_t vcvtn_u32_f32(float32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vcvtn_u32_v((int8x8_t)__a, 18); } +__ai uint32x4_t vcvtnq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vcvtnq_u32_v((int8x16_t)__a, 50); } + +__ai uint64x2_t vcvtnq_u64_f64(float64x2_t __a) { + return (uint64x2_t)__builtin_arm64_vcvtnq_u64_v((int8x16_t)__a, 51); } + +__ai int64_t vcvtpd_s64_f64(float64_t __a) { + return (int64_t)__builtin_arm64_vcvtpd_s64_f64(__a); } + +__ai uint64_t vcvtpd_u64_f64(float64_t __a) { + return (uint64_t)__builtin_arm64_vcvtpd_u64_f64(__a); } + +__ai int32_t vcvtps_s32_f32(float32_t __a) { + return (int32_t)__builtin_arm64_vcvtps_s32_f32(__a); } + +__ai uint32_t vcvtps_u32_f32(float32_t __a) { + return (uint32_t)__builtin_arm64_vcvtps_u32_f32(__a); } + +__ai int32x2_t vcvtp_s32_f32(float32x2_t __a) { + return (int32x2_t)__builtin_arm64_vcvtp_s32_v((int8x8_t)__a, 2); } +__ai int32x4_t vcvtpq_s32_f32(float32x4_t __a) { + return (int32x4_t)__builtin_arm64_vcvtpq_s32_v((int8x16_t)__a, 34); } + +__ai int64x2_t vcvtpq_s64_f64(float64x2_t __a) { + return (int64x2_t)__builtin_arm64_vcvtpq_s64_v((int8x16_t)__a, 35); } + +__ai uint32x2_t vcvtp_u32_f32(float32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vcvtp_u32_v((int8x8_t)__a, 18); } +__ai uint32x4_t vcvtpq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vcvtpq_u32_v((int8x16_t)__a, 50); } + +__ai uint64x2_t vcvtpq_u64_f64(float64x2_t __a) { + return (uint64x2_t)__builtin_arm64_vcvtpq_u64_v((int8x16_t)__a, 51); } + +__ai float32_t vcvts_f32_s32(int32_t __a) { + return (float32_t)__builtin_arm64_vcvts_f32_s32(__a); } +__ai float32_t vcvts_f32_u32(uint32_t __a) { + return (float32_t)__builtin_arm64_vcvts_f32_u32(__a); } + +#define vcvts_n_f32_s32(a, __b) __extension__ ({ \ + int32_t __a = (a); \ + (float32_t)__builtin_arm64_vcvts_n_f32_s32(__a, __b); }) +#define vcvts_n_f32_u32(a, __b) __extension__ ({ \ + uint32_t __a = (a); \ + (float32_t)__builtin_arm64_vcvts_n_f32_u32(__a, __b); }) + +#define vcvts_n_s32_f32(a, __b) __extension__ ({ \ + float32_t __a = (a); \ + (int32_t)__builtin_arm64_vcvts_n_s32_f32(__a, __b); }) + +#define vcvts_n_u32_f32(a, __b) __extension__ ({ \ + float32_t __a = (a); \ + (uint32_t)__builtin_arm64_vcvts_n_u32_f32(__a, __b); }) + +__ai int32_t vcvts_s32_f32(float32_t __a) { + return (int32_t)__builtin_arm64_vcvts_s32_f32(__a); } + +__ai uint32_t vcvts_u32_f32(float32_t __a) { + return (uint32_t)__builtin_arm64_vcvts_u32_f32(__a); } + +__ai float32_t vcvtxd_f32_f64(float64_t __a) { + return (float32_t)__builtin_arm64_vcvtxd_f32_f64(__a); } + +__ai float32x2_t vcvtx_f32_f64(float64x2_t __a) { + return (float32x2_t)__builtin_arm64_vcvtx_f32_f64((int8x16_t)__a, 7); } + +__ai float32x4_t vcvtx_high_f32_f64(float32x2_t __a, float64x2_t __b) { + return (float32x4_t)__builtin_arm64_vcvtx_high_f32_f64((int8x8_t)__a, (int8x16_t)__b, 39); } + +__ai int64_t vcvtzd_s64_f64(float64_t __a) { + return (int64_t)__builtin_arm64_vcvtzd_s64_f64(__a); } + +__ai uint64_t vcvtzd_u64_f64(float64_t __a) { + return (uint64_t)__builtin_arm64_vcvtzd_u64_f64(__a); } + +__ai int32_t vcvtzs_s32_f32(float32_t __a) { + return (int32_t)__builtin_arm64_vcvtzs_s32_f32(__a); } + +__ai uint32_t vcvtzs_u32_f32(float32_t __a) { + return (uint32_t)__builtin_arm64_vcvtzs_u32_f32(__a); } + +__ai int32x2_t vcvtz_s32_f32(float32x2_t __a) { + return (int32x2_t)__builtin_arm64_vcvtz_s32_v((int8x8_t)__a, 2); } +__ai int32x4_t vcvtzq_s32_f32(float32x4_t __a) { + return (int32x4_t)__builtin_arm64_vcvtzq_s32_v((int8x16_t)__a, 34); } + +__ai int64x2_t vcvtzq_s64_f64(float64x2_t __a) { + return (int64x2_t)__builtin_arm64_vcvtzq_s64_v((int8x16_t)__a, 35); } + +__ai uint32x2_t vcvtz_u32_f32(float32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vcvtz_u32_v((int8x8_t)__a, 18); } +__ai uint32x4_t vcvtzq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vcvtzq_u32_v((int8x16_t)__a, 50); } + +__ai uint64x2_t vcvtzq_u64_f64(float64x2_t __a) { + return (uint64x2_t)__builtin_arm64_vcvtzq_u64_v((int8x16_t)__a, 51); } + +__ai float16x4_t vcvt_f16_f32(float32x4_t __a) { + return (float16x4_t)__builtin_arm64_vcvt_f16_v((int8x16_t)__a, 6); } + +__ai float32x2_t vcvt_f32_s32(int32x2_t __a) { + return (float32x2_t)__builtin_arm64_vcvt_f32_v((int8x8_t)__a, 2); } +__ai float32x2_t vcvt_f32_u32(uint32x2_t __a) { + return (float32x2_t)__builtin_arm64_vcvt_f32_v((int8x8_t)__a, 18); } +__ai float32x4_t vcvtq_f32_s32(int32x4_t __a) { + return (float32x4_t)__builtin_arm64_vcvtq_f32_v((int8x16_t)__a, 34); } +__ai float32x4_t vcvtq_f32_u32(uint32x4_t __a) { + return (float32x4_t)__builtin_arm64_vcvtq_f32_v((int8x16_t)__a, 50); } + +__ai float32x4_t vcvt_f32_f16(float16x4_t __a) { + return (float32x4_t)__builtin_arm64_vcvt_f32_f16((int8x8_t)__a, 6); } + +__ai float32x2_t vcvt_f32_f64(float64x2_t __a) { + return (float32x2_t)__builtin_arm64_vcvt_f32_f64((int8x16_t)__a, 7); } + +__ai float64x2_t vcvtq_f64_s64(int64x2_t __a) { + return (float64x2_t)__builtin_arm64_vcvtq_f64_v((int8x16_t)__a, 35); } +__ai float64x2_t vcvtq_f64_u64(uint64x2_t __a) { + return (float64x2_t)__builtin_arm64_vcvtq_f64_v((int8x16_t)__a, 51); } + +__ai float64x2_t vcvt_f64_f32(float32x2_t __a) { + return (float64x2_t)__builtin_arm64_vcvt_f64_f32((int8x8_t)__a, 40); } + +__ai float32x4_t vcvt_high_f32_f64(float32x2_t __a, float64x2_t __b) { + return (float32x4_t)__builtin_arm64_vcvt_high_f32_f64((int8x8_t)__a, (int8x16_t)__b, 39); } + +__ai float64x2_t vcvt_high_f64_f32(float32x4_t __a) { + return (float64x2_t)__builtin_arm64_vcvt_high_f64_f32((int8x16_t)__a, 40); } + +#define vcvt_n_f32_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (float32x2_t)__builtin_arm64_vcvt_n_f32_v((int8x8_t)__a, __b, 2); }) +#define vcvt_n_f32_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (float32x2_t)__builtin_arm64_vcvt_n_f32_v((int8x8_t)__a, __b, 18); }) +#define vcvtq_n_f32_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (float32x4_t)__builtin_arm64_vcvtq_n_f32_v((int8x16_t)__a, __b, 34); }) +#define vcvtq_n_f32_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (float32x4_t)__builtin_arm64_vcvtq_n_f32_v((int8x16_t)__a, __b, 50); }) + +#define vcvtq_n_f64_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (float64x2_t)__builtin_arm64_vcvtq_n_f64_v((int8x16_t)__a, __b, 35); }) +#define vcvtq_n_f64_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (float64x2_t)__builtin_arm64_vcvtq_n_f64_v((int8x16_t)__a, __b, 51); }) + +#define vcvt_n_s32_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vcvt_n_s32_v((int8x8_t)__a, __b, 2); }) +#define vcvtq_n_s32_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + (int32x4_t)__builtin_arm64_vcvtq_n_s32_v((int8x16_t)__a, __b, 34); }) + +#define vcvtq_n_s64_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + (int64x2_t)__builtin_arm64_vcvtq_n_s64_v((int8x16_t)__a, __b, 35); }) + +#define vcvt_n_u32_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vcvt_n_u32_v((int8x8_t)__a, __b, 18); }) +#define vcvtq_n_u32_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + (uint32x4_t)__builtin_arm64_vcvtq_n_u32_v((int8x16_t)__a, __b, 50); }) + +#define vcvtq_n_u64_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + (uint64x2_t)__builtin_arm64_vcvtq_n_u64_v((int8x16_t)__a, __b, 51); }) + +__ai int32x2_t vcvt_s32_f32(float32x2_t __a) { + return (int32x2_t)__builtin_arm64_vcvt_s32_v((int8x8_t)__a, 2); } +__ai int32x4_t vcvtq_s32_f32(float32x4_t __a) { + return (int32x4_t)__builtin_arm64_vcvtq_s32_v((int8x16_t)__a, 34); } + +__ai int64x2_t vcvtq_s64_f64(float64x2_t __a) { + return (int64x2_t)__builtin_arm64_vcvtq_s64_v((int8x16_t)__a, 35); } + +__ai uint32x2_t vcvt_u32_f32(float32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vcvt_u32_v((int8x8_t)__a, 18); } +__ai uint32x4_t vcvtq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vcvtq_u32_v((int8x16_t)__a, 50); } + +__ai uint64x2_t vcvtq_u64_f64(float64x2_t __a) { + return (uint64x2_t)__builtin_arm64_vcvtq_u64_v((int8x16_t)__a, 51); } + +__ai float32x2_t vdiv_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vdiv_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vdivq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vdivq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vdivq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vdivq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +#define vdupb_lane_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8_t)__builtin_arm64_vdupb_lane_s8(__a, __b); }) +#define vdupb_lane_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8_t)__builtin_arm64_vdupb_lane_u8((int8x16_t)__a, __b); }) + +#define vdupd_lane_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64_t)__builtin_arm64_vdupd_lane_s64(__a, __b); }) +#define vdupd_lane_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64_t)__builtin_arm64_vdupd_lane_u64((int64x2_t)__a, __b); }) +#define vdupd_lane_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + (float64_t)__builtin_arm64_vdupd_lane_f64(__a, __b); }) + +#define vduph_lane_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16_t)__builtin_arm64_vduph_lane_s16(__a, __b); }) +#define vduph_lane_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16_t)__builtin_arm64_vduph_lane_u16((int16x8_t)__a, __b); }) + +#define vdups_lane_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32_t)__builtin_arm64_vdups_lane_s32(__a, __b); }) +#define vdups_lane_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32_t)__builtin_arm64_vdups_lane_u32((int32x4_t)__a, __b); }) +#define vdups_lane_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + (float32_t)__builtin_arm64_vdups_lane_f32(__a, __b); }) + +#define vdup_lane_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_lane_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_lane_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdup_lane_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b); }) +#define vdup_lane_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_lane_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_lane_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdup_lane_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b); }) +#define vdup_lane_p8(a, __b) __extension__ ({ \ + poly8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_lane_p16(a, __b) __extension__ ({ \ + poly16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_lane_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdupq_lane_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdupq_lane_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdupq_lane_p8(a, __b) __extension__ ({ \ + poly8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_p16(a, __b) __extension__ ({ \ + poly16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdupq_lane_f64(a, __b) __extension__ ({ \ + float64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdupq_lane_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdupq_lane_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) + +#define vdup_lane_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + (float64_t)__builtin_arm64_vdup_lane_f64(__a, __b); }) + +__ai uint8x8_t vdup_n_u8(uint8_t __a) { + return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint16x4_t vdup_n_u16(uint16_t __a) { + return (uint16x4_t){ __a, __a, __a, __a }; } +__ai uint32x2_t vdup_n_u32(uint32_t __a) { + return (uint32x2_t){ __a, __a }; } +__ai int8x8_t vdup_n_s8(int8_t __a) { + return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int16x4_t vdup_n_s16(int16_t __a) { + return (int16x4_t){ __a, __a, __a, __a }; } +__ai int32x2_t vdup_n_s32(int32_t __a) { + return (int32x2_t){ __a, __a }; } +__ai float64x1_t vdup_n_f64(float64_t __a) { + return (float64x1_t){ __a }; } +__ai poly8x8_t vdup_n_p8(poly8_t __a) { + return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly16x4_t vdup_n_p16(poly16_t __a) { + return (poly16x4_t){ __a, __a, __a, __a }; } +__ai float16x4_t vdup_n_f16(float16_t __a) { + return (float16x4_t){ __a, __a, __a, __a }; } +__ai float32x2_t vdup_n_f32(float32_t __a) { + return (float32x2_t){ __a, __a }; } +__ai uint8x16_t vdupq_n_u8(uint8_t __a) { + return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint16x8_t vdupq_n_u16(uint16_t __a) { + return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint32x4_t vdupq_n_u32(uint32_t __a) { + return (uint32x4_t){ __a, __a, __a, __a }; } +__ai int8x16_t vdupq_n_s8(int8_t __a) { + return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int16x8_t vdupq_n_s16(int16_t __a) { + return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int32x4_t vdupq_n_s32(int32_t __a) { + return (int32x4_t){ __a, __a, __a, __a }; } +__ai poly8x16_t vdupq_n_p8(poly8_t __a) { + return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly16x8_t vdupq_n_p16(poly16_t __a) { + return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai float16x8_t vdupq_n_f16(float16_t __a) { + return (float16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai float32x4_t vdupq_n_f32(float32_t __a) { + return (float32x4_t){ __a, __a, __a, __a }; } +__ai float64x2_t vdupq_n_f64(float64_t __a) { + return (float64x2_t){ __a, __a }; } +__ai int64x1_t vdup_n_s64(int64_t __a) { + return (int64x1_t){ __a }; } +__ai uint64x1_t vdup_n_u64(uint64_t __a) { + return (uint64x1_t){ __a }; } +__ai int64x2_t vdupq_n_s64(int64_t __a) { + return (int64x2_t){ __a, __a }; } +__ai uint64x2_t vdupq_n_u64(uint64_t __a) { + return (uint64x2_t){ __a, __a }; } + +__ai int8x8_t veor_s8(int8x8_t __a, int8x8_t __b) { + return __a ^ __b; } +__ai int16x4_t veor_s16(int16x4_t __a, int16x4_t __b) { + return __a ^ __b; } +__ai int32x2_t veor_s32(int32x2_t __a, int32x2_t __b) { + return __a ^ __b; } +__ai int64x1_t veor_s64(int64x1_t __a, int64x1_t __b) { + return __a ^ __b; } +__ai uint8x8_t veor_u8(uint8x8_t __a, uint8x8_t __b) { + return __a ^ __b; } +__ai uint16x4_t veor_u16(uint16x4_t __a, uint16x4_t __b) { + return __a ^ __b; } +__ai uint32x2_t veor_u32(uint32x2_t __a, uint32x2_t __b) { + return __a ^ __b; } +__ai uint64x1_t veor_u64(uint64x1_t __a, uint64x1_t __b) { + return __a ^ __b; } +__ai int8x16_t veorq_s8(int8x16_t __a, int8x16_t __b) { + return __a ^ __b; } +__ai int16x8_t veorq_s16(int16x8_t __a, int16x8_t __b) { + return __a ^ __b; } +__ai int32x4_t veorq_s32(int32x4_t __a, int32x4_t __b) { + return __a ^ __b; } +__ai int64x2_t veorq_s64(int64x2_t __a, int64x2_t __b) { + return __a ^ __b; } +__ai uint8x16_t veorq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a ^ __b; } +__ai uint16x8_t veorq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a ^ __b; } +__ai uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a ^ __b; } +__ai uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a ^ __b; } + +#define vext_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_arm64_vext_v(__a, __b, __c, 0); }) +#define vext_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vext_p8(a, b, __c) __extension__ ({ \ + poly8x8_t __a = (a); poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) +#define vext_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vext_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vext_p16(a, b, __c) __extension__ ({ \ + poly16x4_t __a = (a); poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) +#define vext_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vext_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vext_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vext_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vext_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); \ + (float32x2_t)__builtin_arm64_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 7); }) +#define vextq_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_arm64_vextq_v(__a, __b, __c, 32); }) +#define vextq_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vextq_p8(a, b, __c) __extension__ ({ \ + poly8x16_t __a = (a); poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) +#define vextq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vextq_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vextq_p16(a, b, __c) __extension__ ({ \ + poly16x8_t __a = (a); poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) +#define vextq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vextq_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vextq_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vextq_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) +#define vextq_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); \ + (float32x4_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 39); }) +#define vextq_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); \ + (float64x2_t)__builtin_arm64_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 40); }) + +__ai float32x2_t vfma_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { + return (float32x2_t)__builtin_arm64_vfma_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 7); } +__ai float32x4_t vfmaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { + return (float32x4_t)__builtin_arm64_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 39); } +__ai float64x2_t vfmaq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) { + return (float64x2_t)__builtin_arm64_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 40); } + +#define vfma_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ + (float32x2_t)__builtin_arm64_vfma_lane_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, __d, 7); }) +#define vfmaq_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \ + (float32x4_t)__builtin_arm64_vfmaq_lane_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, __d, 39); }) +#define vfmaq_lane_f64(a, b, c, __d) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); float64x2_t __c = (c); \ + (float64x2_t)__builtin_arm64_vfmaq_lane_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, __d, 40); }) + +__ai float32x2_t vfma_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { + return (float32x2_t)__builtin_arm64_vfma_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)(float32x2_t){ __c, __c }, 7); } +__ai float32x4_t vfmaq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { + return (float32x4_t)__builtin_arm64_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)(float32x4_t){ __c, __c, __c, __c }, 39); } +__ai float64x2_t vfmaq_n_f64(float64x2_t __a, float64x2_t __b, float64_t __c) { + return (float64x2_t)__builtin_arm64_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)(float64x2_t){ __c, __c }, 40); } + +__ai float32x2_t vfms_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { + return (float32x2_t)__builtin_arm64_vfms_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 7); } +__ai float32x4_t vfmsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { + return (float32x4_t)__builtin_arm64_vfmsq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 39); } +__ai float64x2_t vfmsq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) { + return (float64x2_t)__builtin_arm64_vfmsq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 40); } + +#define vfms_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ + (float32x2_t)__builtin_arm64_vfms_lane_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, __d, 7); }) +#define vfmsq_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \ + (float32x4_t)__builtin_arm64_vfmsq_lane_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, __d, 39); }) + +#define vget_lane_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8_t)__builtin_arm64_vget_lane_s8(__a, __b); }) +#define vget_lane_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16_t)__builtin_arm64_vget_lane_s16(__a, __b); }) +#define vget_lane_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32_t)__builtin_arm64_vget_lane_s32(__a, __b); }) +#define vget_lane_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64_t)__builtin_arm64_vget_lane_s64(__a, __b); }) +#define vget_lane_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8_t)__builtin_arm64_vget_lane_u8((int8x8_t)__a, __b); }) +#define vget_lane_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16_t)__builtin_arm64_vget_lane_u16((int16x4_t)__a, __b); }) +#define vget_lane_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32_t)__builtin_arm64_vget_lane_u32((int32x2_t)__a, __b); }) +#define vget_lane_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64_t)__builtin_arm64_vget_lane_u64((int64x1_t)__a, __b); }) +#define vget_lane_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + (float32_t)__builtin_arm64_vget_lane_f32(__a, __b); }) +#define vget_lane_f64(a, __b) __extension__ ({ \ + float64x1_t __a = (a); \ + (float64_t)__builtin_arm64_vget_lane_f64(__a, __b); }) +#define vget_lane_p8(a, __b) __extension__ ({ \ + poly8x8_t __a = (a); \ + (poly8_t)__builtin_arm64_vget_lane_p8((int8x8_t)__a, __b); }) +#define vget_lane_p16(a, __b) __extension__ ({ \ + poly16x4_t __a = (a); \ + (poly16_t)__builtin_arm64_vget_lane_p16((int16x4_t)__a, __b); }) +#define vgetq_lane_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8_t)__builtin_arm64_vgetq_lane_s8(__a, __b); }) +#define vgetq_lane_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16_t)__builtin_arm64_vgetq_lane_s16(__a, __b); }) +#define vgetq_lane_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32_t)__builtin_arm64_vgetq_lane_s32(__a, __b); }) +#define vgetq_lane_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64_t)__builtin_arm64_vgetq_lane_s64(__a, __b); }) +#define vgetq_lane_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8_t)__builtin_arm64_vgetq_lane_u8((int8x16_t)__a, __b); }) +#define vgetq_lane_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16_t)__builtin_arm64_vgetq_lane_u16((int16x8_t)__a, __b); }) +#define vgetq_lane_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32_t)__builtin_arm64_vgetq_lane_u32((int32x4_t)__a, __b); }) +#define vgetq_lane_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64_t)__builtin_arm64_vgetq_lane_u64((int64x2_t)__a, __b); }) +#define vgetq_lane_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + (float32_t)__builtin_arm64_vgetq_lane_f32(__a, __b); }) +#define vgetq_lane_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + (float64_t)__builtin_arm64_vgetq_lane_f64(__a, __b); }) +#define vgetq_lane_p8(a, __b) __extension__ ({ \ + poly8x16_t __a = (a); \ + (poly8_t)__builtin_arm64_vgetq_lane_p8((int8x16_t)__a, __b); }) +#define vgetq_lane_p16(a, __b) __extension__ ({ \ + poly16x8_t __a = (a); \ + (poly16_t)__builtin_arm64_vgetq_lane_p16((int16x8_t)__a, __b); }) + +__ai int8x8_t vget_low_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai int16x4_t vget_low_s16(int16x8_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +__ai int32x2_t vget_low_s32(int32x4_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1); } +__ai int64x1_t vget_low_s64(int64x2_t __a) { + return __builtin_shufflevector(__a, __a, 0); } +__ai float16x4_t vget_low_f16(float16x8_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +__ai float32x2_t vget_low_f32(float32x4_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1); } +__ai float64x1_t vget_low_f64(float64x2_t __a) { + return __builtin_shufflevector(__a, __a, 0); } +__ai uint8x8_t vget_low_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai uint16x4_t vget_low_u16(uint16x8_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +__ai uint32x2_t vget_low_u32(uint32x4_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1); } +__ai uint64x1_t vget_low_u64(uint64x2_t __a) { + return __builtin_shufflevector(__a, __a, 0); } +__ai poly8x8_t vget_low_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai poly16x4_t vget_low_p16(poly16x8_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } + +__ai int8x8_t vhadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vhadd_v(__a, __b, 0); } +__ai int16x4_t vhadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vhadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vhadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vhadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vhadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vhaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vhaddq_v(__a, __b, 32); } +__ai int16x8_t vhaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vhaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vhaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vhaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vhaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai int8x8_t vhsub_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vhsub_v(__a, __b, 0); } +__ai int16x4_t vhsub_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vhsub_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vhsub_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vhsub_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vhsub_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vhsubq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vhsubq_v(__a, __b, 32); } +__ai int16x8_t vhsubq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vhsubq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vhsubq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vhsubq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vhsubq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +#define vld1q_u8(__a) __extension__ ({ \ + (uint8x16_t)__builtin_arm64_vld1q_v(__a, 48); }) +#define vld1q_u16(__a) __extension__ ({ \ + (uint16x8_t)__builtin_arm64_vld1q_v(__a, 49); }) +#define vld1q_u32(__a) __extension__ ({ \ + (uint32x4_t)__builtin_arm64_vld1q_v(__a, 50); }) +#define vld1q_u64(__a) __extension__ ({ \ + (uint64x2_t)__builtin_arm64_vld1q_v(__a, 51); }) +#define vld1q_s8(__a) __extension__ ({ \ + (int8x16_t)__builtin_arm64_vld1q_v(__a, 32); }) +#define vld1q_s16(__a) __extension__ ({ \ + (int16x8_t)__builtin_arm64_vld1q_v(__a, 33); }) +#define vld1q_s32(__a) __extension__ ({ \ + (int32x4_t)__builtin_arm64_vld1q_v(__a, 34); }) +#define vld1q_s64(__a) __extension__ ({ \ + (int64x2_t)__builtin_arm64_vld1q_v(__a, 35); }) +#define vld1q_f16(__a) __extension__ ({ \ + (float16x8_t)__builtin_arm64_vld1q_v(__a, 38); }) +#define vld1q_f32(__a) __extension__ ({ \ + (float32x4_t)__builtin_arm64_vld1q_v(__a, 39); }) +#define vld1q_f64(__a) __extension__ ({ \ + (float64x2_t)__builtin_arm64_vld1q_v(__a, 40); }) +#define vld1q_p8(__a) __extension__ ({ \ + (poly8x16_t)__builtin_arm64_vld1q_v(__a, 36); }) +#define vld1q_p16(__a) __extension__ ({ \ + (poly16x8_t)__builtin_arm64_vld1q_v(__a, 37); }) +#define vld1_u8(__a) __extension__ ({ \ + (uint8x8_t)__builtin_arm64_vld1_v(__a, 16); }) +#define vld1_u16(__a) __extension__ ({ \ + (uint16x4_t)__builtin_arm64_vld1_v(__a, 17); }) +#define vld1_u32(__a) __extension__ ({ \ + (uint32x2_t)__builtin_arm64_vld1_v(__a, 18); }) +#define vld1_u64(__a) __extension__ ({ \ + (uint64x1_t)__builtin_arm64_vld1_v(__a, 19); }) +#define vld1_s8(__a) __extension__ ({ \ + (int8x8_t)__builtin_arm64_vld1_v(__a, 0); }) +#define vld1_s16(__a) __extension__ ({ \ + (int16x4_t)__builtin_arm64_vld1_v(__a, 1); }) +#define vld1_s32(__a) __extension__ ({ \ + (int32x2_t)__builtin_arm64_vld1_v(__a, 2); }) +#define vld1_s64(__a) __extension__ ({ \ + (int64x1_t)__builtin_arm64_vld1_v(__a, 3); }) +#define vld1_f16(__a) __extension__ ({ \ + (float16x4_t)__builtin_arm64_vld1_v(__a, 6); }) +#define vld1_f32(__a) __extension__ ({ \ + (float32x2_t)__builtin_arm64_vld1_v(__a, 7); }) +#define vld1_f64(__a) __extension__ ({ \ + (float64x1_t)__builtin_arm64_vld1_v(__a, 8); }) +#define vld1_p8(__a) __extension__ ({ \ + (poly8x8_t)__builtin_arm64_vld1_v(__a, 4); }) +#define vld1_p16(__a) __extension__ ({ \ + (poly16x4_t)__builtin_arm64_vld1_v(__a, 5); }) + +#define vld1q_dup_u8(__a) __extension__ ({ \ + (uint8x16_t)__builtin_arm64_vld1q_dup_v(__a, 48); }) +#define vld1q_dup_u16(__a) __extension__ ({ \ + (uint16x8_t)__builtin_arm64_vld1q_dup_v(__a, 49); }) +#define vld1q_dup_u32(__a) __extension__ ({ \ + (uint32x4_t)__builtin_arm64_vld1q_dup_v(__a, 50); }) +#define vld1q_dup_u64(__a) __extension__ ({ \ + (uint64x2_t)__builtin_arm64_vld1q_dup_v(__a, 51); }) +#define vld1q_dup_s8(__a) __extension__ ({ \ + (int8x16_t)__builtin_arm64_vld1q_dup_v(__a, 32); }) +#define vld1q_dup_s16(__a) __extension__ ({ \ + (int16x8_t)__builtin_arm64_vld1q_dup_v(__a, 33); }) +#define vld1q_dup_s32(__a) __extension__ ({ \ + (int32x4_t)__builtin_arm64_vld1q_dup_v(__a, 34); }) +#define vld1q_dup_s64(__a) __extension__ ({ \ + (int64x2_t)__builtin_arm64_vld1q_dup_v(__a, 35); }) +#define vld1q_dup_f16(__a) __extension__ ({ \ + (float16x8_t)__builtin_arm64_vld1q_dup_v(__a, 38); }) +#define vld1q_dup_f32(__a) __extension__ ({ \ + (float32x4_t)__builtin_arm64_vld1q_dup_v(__a, 39); }) +#define vld1q_dup_f64(__a) __extension__ ({ \ + (float64x2_t)__builtin_arm64_vld1q_dup_v(__a, 40); }) +#define vld1q_dup_p8(__a) __extension__ ({ \ + (poly8x16_t)__builtin_arm64_vld1q_dup_v(__a, 36); }) +#define vld1q_dup_p16(__a) __extension__ ({ \ + (poly16x8_t)__builtin_arm64_vld1q_dup_v(__a, 37); }) +#define vld1_dup_u8(__a) __extension__ ({ \ + (uint8x8_t)__builtin_arm64_vld1_dup_v(__a, 16); }) +#define vld1_dup_u16(__a) __extension__ ({ \ + (uint16x4_t)__builtin_arm64_vld1_dup_v(__a, 17); }) +#define vld1_dup_u32(__a) __extension__ ({ \ + (uint32x2_t)__builtin_arm64_vld1_dup_v(__a, 18); }) +#define vld1_dup_u64(__a) __extension__ ({ \ + (uint64x1_t)__builtin_arm64_vld1_dup_v(__a, 19); }) +#define vld1_dup_s8(__a) __extension__ ({ \ + (int8x8_t)__builtin_arm64_vld1_dup_v(__a, 0); }) +#define vld1_dup_s16(__a) __extension__ ({ \ + (int16x4_t)__builtin_arm64_vld1_dup_v(__a, 1); }) +#define vld1_dup_s32(__a) __extension__ ({ \ + (int32x2_t)__builtin_arm64_vld1_dup_v(__a, 2); }) +#define vld1_dup_s64(__a) __extension__ ({ \ + (int64x1_t)__builtin_arm64_vld1_dup_v(__a, 3); }) +#define vld1_dup_f16(__a) __extension__ ({ \ + (float16x4_t)__builtin_arm64_vld1_dup_v(__a, 6); }) +#define vld1_dup_f32(__a) __extension__ ({ \ + (float32x2_t)__builtin_arm64_vld1_dup_v(__a, 7); }) +#define vld1_dup_f64(__a) __extension__ ({ \ + (float64x1_t)__builtin_arm64_vld1_dup_v(__a, 8); }) +#define vld1_dup_p8(__a) __extension__ ({ \ + (poly8x8_t)__builtin_arm64_vld1_dup_v(__a, 4); }) +#define vld1_dup_p16(__a) __extension__ ({ \ + (poly16x4_t)__builtin_arm64_vld1_dup_v(__a, 5); }) + +#define vld1q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 48); }) +#define vld1q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 49); }) +#define vld1q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 50); }) +#define vld1q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 51); }) +#define vld1q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16_t __b = (b); \ + (int8x16_t)__builtin_arm64_vld1q_lane_v(__a, __b, __c, 32); }) +#define vld1q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8_t __b = (b); \ + (int16x8_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 33); }) +#define vld1q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4_t __b = (b); \ + (int32x4_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 34); }) +#define vld1q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2_t __b = (b); \ + (int64x2_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 35); }) +#define vld1q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8_t __b = (b); \ + (float16x8_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 38); }) +#define vld1q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4_t __b = (b); \ + (float32x4_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 39); }) +#define vld1q_lane_f64(__a, b, __c) __extension__ ({ \ + float64x2_t __b = (b); \ + (float64x2_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 40); }) +#define vld1q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 36); }) +#define vld1q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_arm64_vld1q_lane_v(__a, (int8x16_t)__b, __c, 37); }) +#define vld1_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 16); }) +#define vld1_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 17); }) +#define vld1_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 18); }) +#define vld1_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 19); }) +#define vld1_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8_t __b = (b); \ + (int8x8_t)__builtin_arm64_vld1_lane_v(__a, __b, __c, 0); }) +#define vld1_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4_t __b = (b); \ + (int16x4_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 1); }) +#define vld1_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2_t __b = (b); \ + (int32x2_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 2); }) +#define vld1_lane_s64(__a, b, __c) __extension__ ({ \ + int64x1_t __b = (b); \ + (int64x1_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 3); }) +#define vld1_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4_t __b = (b); \ + (float16x4_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 6); }) +#define vld1_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2_t __b = (b); \ + (float32x2_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 7); }) +#define vld1_lane_f64(__a, b, __c) __extension__ ({ \ + float64x1_t __b = (b); \ + (float64x1_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 8); }) +#define vld1_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 4); }) +#define vld1_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_arm64_vld1_lane_v(__a, (int8x8_t)__b, __c, 5); }) + +#define vld2q_u8(__a) __extension__ ({ \ + uint8x16x2_t r; __builtin_arm64_vld2q_v(&r, __a, 48); r; }) +#define vld2q_u16(__a) __extension__ ({ \ + uint16x8x2_t r; __builtin_arm64_vld2q_v(&r, __a, 49); r; }) +#define vld2q_u32(__a) __extension__ ({ \ + uint32x4x2_t r; __builtin_arm64_vld2q_v(&r, __a, 50); r; }) +#define vld2q_u64(__a) __extension__ ({ \ + uint64x2x2_t r; __builtin_arm64_vld2q_v(&r, __a, 51); r; }) +#define vld2q_s8(__a) __extension__ ({ \ + int8x16x2_t r; __builtin_arm64_vld2q_v(&r, __a, 32); r; }) +#define vld2q_s16(__a) __extension__ ({ \ + int16x8x2_t r; __builtin_arm64_vld2q_v(&r, __a, 33); r; }) +#define vld2q_s32(__a) __extension__ ({ \ + int32x4x2_t r; __builtin_arm64_vld2q_v(&r, __a, 34); r; }) +#define vld2q_s64(__a) __extension__ ({ \ + int64x2x2_t r; __builtin_arm64_vld2q_v(&r, __a, 35); r; }) +#define vld2q_f16(__a) __extension__ ({ \ + float16x8x2_t r; __builtin_arm64_vld2q_v(&r, __a, 38); r; }) +#define vld2q_f32(__a) __extension__ ({ \ + float32x4x2_t r; __builtin_arm64_vld2q_v(&r, __a, 39); r; }) +#define vld2q_f64(__a) __extension__ ({ \ + float64x2x2_t r; __builtin_arm64_vld2q_v(&r, __a, 40); r; }) +#define vld2q_p8(__a) __extension__ ({ \ + poly8x16x2_t r; __builtin_arm64_vld2q_v(&r, __a, 36); r; }) +#define vld2q_p16(__a) __extension__ ({ \ + poly16x8x2_t r; __builtin_arm64_vld2q_v(&r, __a, 37); r; }) +#define vld2_u8(__a) __extension__ ({ \ + uint8x8x2_t r; __builtin_arm64_vld2_v(&r, __a, 16); r; }) +#define vld2_u16(__a) __extension__ ({ \ + uint16x4x2_t r; __builtin_arm64_vld2_v(&r, __a, 17); r; }) +#define vld2_u32(__a) __extension__ ({ \ + uint32x2x2_t r; __builtin_arm64_vld2_v(&r, __a, 18); r; }) +#define vld2_u64(__a) __extension__ ({ \ + uint64x1x2_t r; __builtin_arm64_vld2_v(&r, __a, 19); r; }) +#define vld2_s8(__a) __extension__ ({ \ + int8x8x2_t r; __builtin_arm64_vld2_v(&r, __a, 0); r; }) +#define vld2_s16(__a) __extension__ ({ \ + int16x4x2_t r; __builtin_arm64_vld2_v(&r, __a, 1); r; }) +#define vld2_s32(__a) __extension__ ({ \ + int32x2x2_t r; __builtin_arm64_vld2_v(&r, __a, 2); r; }) +#define vld2_s64(__a) __extension__ ({ \ + int64x1x2_t r; __builtin_arm64_vld2_v(&r, __a, 3); r; }) +#define vld2_f16(__a) __extension__ ({ \ + float16x4x2_t r; __builtin_arm64_vld2_v(&r, __a, 6); r; }) +#define vld2_f32(__a) __extension__ ({ \ + float32x2x2_t r; __builtin_arm64_vld2_v(&r, __a, 7); r; }) +#define vld2_f64(__a) __extension__ ({ \ + float64x1x2_t r; __builtin_arm64_vld2_v(&r, __a, 8); r; }) +#define vld2_p8(__a) __extension__ ({ \ + poly8x8x2_t r; __builtin_arm64_vld2_v(&r, __a, 4); r; }) +#define vld2_p16(__a) __extension__ ({ \ + poly16x4x2_t r; __builtin_arm64_vld2_v(&r, __a, 5); r; }) + +#define vld2_dup_u8(__a) __extension__ ({ \ + uint8x8x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 16); r; }) +#define vld2_dup_u16(__a) __extension__ ({ \ + uint16x4x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 17); r; }) +#define vld2_dup_u32(__a) __extension__ ({ \ + uint32x2x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 18); r; }) +#define vld2_dup_u64(__a) __extension__ ({ \ + uint64x1x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 19); r; }) +#define vld2_dup_s8(__a) __extension__ ({ \ + int8x8x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 0); r; }) +#define vld2_dup_s16(__a) __extension__ ({ \ + int16x4x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 1); r; }) +#define vld2_dup_s32(__a) __extension__ ({ \ + int32x2x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 2); r; }) +#define vld2_dup_s64(__a) __extension__ ({ \ + int64x1x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 3); r; }) +#define vld2_dup_f16(__a) __extension__ ({ \ + float16x4x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 6); r; }) +#define vld2_dup_f32(__a) __extension__ ({ \ + float32x2x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 7); r; }) +#define vld2_dup_f64(__a) __extension__ ({ \ + float64x1x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 8); r; }) +#define vld2_dup_p8(__a) __extension__ ({ \ + poly8x8x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 4); r; }) +#define vld2_dup_p16(__a) __extension__ ({ \ + poly16x4x2_t r; __builtin_arm64_vld2_dup_v(&r, __a, 5); r; }) + +#define vld2q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16x2_t __b = (b); \ + uint8x16x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 48); r; }) +#define vld2q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x2_t __b = (b); \ + uint16x8x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); r; }) +#define vld2q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x2_t __b = (b); \ + uint32x4x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); r; }) +#define vld2q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2x2_t __b = (b); \ + uint64x2x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 51); r; }) +#define vld2q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16x2_t __b = (b); \ + int8x16x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 32); r; }) +#define vld2q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x2_t __b = (b); \ + int16x8x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); r; }) +#define vld2q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x2_t __b = (b); \ + int32x4x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); r; }) +#define vld2q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2x2_t __b = (b); \ + int64x2x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 35); r; }) +#define vld2q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x2_t __b = (b); \ + float16x8x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); r; }) +#define vld2q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x2_t __b = (b); \ + float32x4x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); r; }) +#define vld2q_lane_f64(__a, b, __c) __extension__ ({ \ + float64x2x2_t __b = (b); \ + float64x2x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 40); r; }) +#define vld2q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16x2_t __b = (b); \ + poly8x16x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 36); r; }) +#define vld2q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x2_t __b = (b); \ + poly16x8x2_t r; __builtin_arm64_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); r; }) +#define vld2_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x2_t __b = (b); \ + uint8x8x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 16); r; }) +#define vld2_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x2_t __b = (b); \ + uint16x4x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 17); r; }) +#define vld2_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x2_t __b = (b); \ + uint32x2x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 18); r; }) +#define vld2_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x1x2_t __b = (b); \ + uint64x1x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 19); r; }) +#define vld2_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x2_t __b = (b); \ + int8x8x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 0); r; }) +#define vld2_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x2_t __b = (b); \ + int16x4x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 1); r; }) +#define vld2_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x2_t __b = (b); \ + int32x2x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 2); r; }) +#define vld2_lane_s64(__a, b, __c) __extension__ ({ \ + int64x1x2_t __b = (b); \ + int64x1x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 3); r; }) +#define vld2_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x2_t __b = (b); \ + float16x4x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 6); r; }) +#define vld2_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x2_t __b = (b); \ + float32x2x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 7); r; }) +#define vld2_lane_f64(__a, b, __c) __extension__ ({ \ + float64x1x2_t __b = (b); \ + float64x1x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 8); r; }) +#define vld2_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x2_t __b = (b); \ + poly8x8x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 4); r; }) +#define vld2_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x2_t __b = (b); \ + poly16x4x2_t r; __builtin_arm64_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 5); r; }) + +#define vld3q_u8(__a) __extension__ ({ \ + uint8x16x3_t r; __builtin_arm64_vld3q_v(&r, __a, 48); r; }) +#define vld3q_u16(__a) __extension__ ({ \ + uint16x8x3_t r; __builtin_arm64_vld3q_v(&r, __a, 49); r; }) +#define vld3q_u32(__a) __extension__ ({ \ + uint32x4x3_t r; __builtin_arm64_vld3q_v(&r, __a, 50); r; }) +#define vld3q_u64(__a) __extension__ ({ \ + uint64x2x3_t r; __builtin_arm64_vld3q_v(&r, __a, 51); r; }) +#define vld3q_s8(__a) __extension__ ({ \ + int8x16x3_t r; __builtin_arm64_vld3q_v(&r, __a, 32); r; }) +#define vld3q_s16(__a) __extension__ ({ \ + int16x8x3_t r; __builtin_arm64_vld3q_v(&r, __a, 33); r; }) +#define vld3q_s32(__a) __extension__ ({ \ + int32x4x3_t r; __builtin_arm64_vld3q_v(&r, __a, 34); r; }) +#define vld3q_s64(__a) __extension__ ({ \ + int64x2x3_t r; __builtin_arm64_vld3q_v(&r, __a, 35); r; }) +#define vld3q_f16(__a) __extension__ ({ \ + float16x8x3_t r; __builtin_arm64_vld3q_v(&r, __a, 38); r; }) +#define vld3q_f32(__a) __extension__ ({ \ + float32x4x3_t r; __builtin_arm64_vld3q_v(&r, __a, 39); r; }) +#define vld3q_f64(__a) __extension__ ({ \ + float64x2x3_t r; __builtin_arm64_vld3q_v(&r, __a, 40); r; }) +#define vld3q_p8(__a) __extension__ ({ \ + poly8x16x3_t r; __builtin_arm64_vld3q_v(&r, __a, 36); r; }) +#define vld3q_p16(__a) __extension__ ({ \ + poly16x8x3_t r; __builtin_arm64_vld3q_v(&r, __a, 37); r; }) +#define vld3_u8(__a) __extension__ ({ \ + uint8x8x3_t r; __builtin_arm64_vld3_v(&r, __a, 16); r; }) +#define vld3_u16(__a) __extension__ ({ \ + uint16x4x3_t r; __builtin_arm64_vld3_v(&r, __a, 17); r; }) +#define vld3_u32(__a) __extension__ ({ \ + uint32x2x3_t r; __builtin_arm64_vld3_v(&r, __a, 18); r; }) +#define vld3_u64(__a) __extension__ ({ \ + uint64x1x3_t r; __builtin_arm64_vld3_v(&r, __a, 19); r; }) +#define vld3_s8(__a) __extension__ ({ \ + int8x8x3_t r; __builtin_arm64_vld3_v(&r, __a, 0); r; }) +#define vld3_s16(__a) __extension__ ({ \ + int16x4x3_t r; __builtin_arm64_vld3_v(&r, __a, 1); r; }) +#define vld3_s32(__a) __extension__ ({ \ + int32x2x3_t r; __builtin_arm64_vld3_v(&r, __a, 2); r; }) +#define vld3_s64(__a) __extension__ ({ \ + int64x1x3_t r; __builtin_arm64_vld3_v(&r, __a, 3); r; }) +#define vld3_f16(__a) __extension__ ({ \ + float16x4x3_t r; __builtin_arm64_vld3_v(&r, __a, 6); r; }) +#define vld3_f32(__a) __extension__ ({ \ + float32x2x3_t r; __builtin_arm64_vld3_v(&r, __a, 7); r; }) +#define vld3_f64(__a) __extension__ ({ \ + float64x1x3_t r; __builtin_arm64_vld3_v(&r, __a, 8); r; }) +#define vld3_p8(__a) __extension__ ({ \ + poly8x8x3_t r; __builtin_arm64_vld3_v(&r, __a, 4); r; }) +#define vld3_p16(__a) __extension__ ({ \ + poly16x4x3_t r; __builtin_arm64_vld3_v(&r, __a, 5); r; }) + +#define vld3_dup_u8(__a) __extension__ ({ \ + uint8x8x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 16); r; }) +#define vld3_dup_u16(__a) __extension__ ({ \ + uint16x4x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 17); r; }) +#define vld3_dup_u32(__a) __extension__ ({ \ + uint32x2x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 18); r; }) +#define vld3_dup_u64(__a) __extension__ ({ \ + uint64x1x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 19); r; }) +#define vld3_dup_s8(__a) __extension__ ({ \ + int8x8x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 0); r; }) +#define vld3_dup_s16(__a) __extension__ ({ \ + int16x4x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 1); r; }) +#define vld3_dup_s32(__a) __extension__ ({ \ + int32x2x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 2); r; }) +#define vld3_dup_s64(__a) __extension__ ({ \ + int64x1x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 3); r; }) +#define vld3_dup_f16(__a) __extension__ ({ \ + float16x4x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 6); r; }) +#define vld3_dup_f32(__a) __extension__ ({ \ + float32x2x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 7); r; }) +#define vld3_dup_f64(__a) __extension__ ({ \ + float64x1x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 8); r; }) +#define vld3_dup_p8(__a) __extension__ ({ \ + poly8x8x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 4); r; }) +#define vld3_dup_p16(__a) __extension__ ({ \ + poly16x4x3_t r; __builtin_arm64_vld3_dup_v(&r, __a, 5); r; }) + +#define vld3q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16x3_t __b = (b); \ + uint8x16x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 48); r; }) +#define vld3q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x3_t __b = (b); \ + uint16x8x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); r; }) +#define vld3q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x3_t __b = (b); \ + uint32x4x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); r; }) +#define vld3q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2x3_t __b = (b); \ + uint64x2x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 51); r; }) +#define vld3q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16x3_t __b = (b); \ + int8x16x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __c, 32); r; }) +#define vld3q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x3_t __b = (b); \ + int16x8x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); r; }) +#define vld3q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x3_t __b = (b); \ + int32x4x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); r; }) +#define vld3q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2x3_t __b = (b); \ + int64x2x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 35); r; }) +#define vld3q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x3_t __b = (b); \ + float16x8x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); r; }) +#define vld3q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x3_t __b = (b); \ + float32x4x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); r; }) +#define vld3q_lane_f64(__a, b, __c) __extension__ ({ \ + float64x2x3_t __b = (b); \ + float64x2x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 40); r; }) +#define vld3q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16x3_t __b = (b); \ + poly8x16x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 36); r; }) +#define vld3q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x3_t __b = (b); \ + poly16x8x3_t r; __builtin_arm64_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); r; }) +#define vld3_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x3_t __b = (b); \ + uint8x8x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); r; }) +#define vld3_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x3_t __b = (b); \ + uint16x4x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); r; }) +#define vld3_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x3_t __b = (b); \ + uint32x2x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); r; }) +#define vld3_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x1x3_t __b = (b); \ + uint64x1x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 19); r; }) +#define vld3_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x3_t __b = (b); \ + int8x8x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __c, 0); r; }) +#define vld3_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x3_t __b = (b); \ + int16x4x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); r; }) +#define vld3_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x3_t __b = (b); \ + int32x2x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); r; }) +#define vld3_lane_s64(__a, b, __c) __extension__ ({ \ + int64x1x3_t __b = (b); \ + int64x1x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 3); r; }) +#define vld3_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x3_t __b = (b); \ + float16x4x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); r; }) +#define vld3_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x3_t __b = (b); \ + float32x2x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); r; }) +#define vld3_lane_f64(__a, b, __c) __extension__ ({ \ + float64x1x3_t __b = (b); \ + float64x1x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 8); r; }) +#define vld3_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x3_t __b = (b); \ + poly8x8x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); r; }) +#define vld3_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x3_t __b = (b); \ + poly16x4x3_t r; __builtin_arm64_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); r; }) + +#define vld4q_u8(__a) __extension__ ({ \ + uint8x16x4_t r; __builtin_arm64_vld4q_v(&r, __a, 48); r; }) +#define vld4q_u16(__a) __extension__ ({ \ + uint16x8x4_t r; __builtin_arm64_vld4q_v(&r, __a, 49); r; }) +#define vld4q_u32(__a) __extension__ ({ \ + uint32x4x4_t r; __builtin_arm64_vld4q_v(&r, __a, 50); r; }) +#define vld4q_u64(__a) __extension__ ({ \ + uint64x2x4_t r; __builtin_arm64_vld4q_v(&r, __a, 51); r; }) +#define vld4q_s8(__a) __extension__ ({ \ + int8x16x4_t r; __builtin_arm64_vld4q_v(&r, __a, 32); r; }) +#define vld4q_s16(__a) __extension__ ({ \ + int16x8x4_t r; __builtin_arm64_vld4q_v(&r, __a, 33); r; }) +#define vld4q_s32(__a) __extension__ ({ \ + int32x4x4_t r; __builtin_arm64_vld4q_v(&r, __a, 34); r; }) +#define vld4q_s64(__a) __extension__ ({ \ + int64x2x4_t r; __builtin_arm64_vld4q_v(&r, __a, 35); r; }) +#define vld4q_f16(__a) __extension__ ({ \ + float16x8x4_t r; __builtin_arm64_vld4q_v(&r, __a, 38); r; }) +#define vld4q_f32(__a) __extension__ ({ \ + float32x4x4_t r; __builtin_arm64_vld4q_v(&r, __a, 39); r; }) +#define vld4q_f64(__a) __extension__ ({ \ + float64x2x4_t r; __builtin_arm64_vld4q_v(&r, __a, 40); r; }) +#define vld4q_p8(__a) __extension__ ({ \ + poly8x16x4_t r; __builtin_arm64_vld4q_v(&r, __a, 36); r; }) +#define vld4q_p16(__a) __extension__ ({ \ + poly16x8x4_t r; __builtin_arm64_vld4q_v(&r, __a, 37); r; }) +#define vld4_u8(__a) __extension__ ({ \ + uint8x8x4_t r; __builtin_arm64_vld4_v(&r, __a, 16); r; }) +#define vld4_u16(__a) __extension__ ({ \ + uint16x4x4_t r; __builtin_arm64_vld4_v(&r, __a, 17); r; }) +#define vld4_u32(__a) __extension__ ({ \ + uint32x2x4_t r; __builtin_arm64_vld4_v(&r, __a, 18); r; }) +#define vld4_u64(__a) __extension__ ({ \ + uint64x1x4_t r; __builtin_arm64_vld4_v(&r, __a, 19); r; }) +#define vld4_s8(__a) __extension__ ({ \ + int8x8x4_t r; __builtin_arm64_vld4_v(&r, __a, 0); r; }) +#define vld4_s16(__a) __extension__ ({ \ + int16x4x4_t r; __builtin_arm64_vld4_v(&r, __a, 1); r; }) +#define vld4_s32(__a) __extension__ ({ \ + int32x2x4_t r; __builtin_arm64_vld4_v(&r, __a, 2); r; }) +#define vld4_s64(__a) __extension__ ({ \ + int64x1x4_t r; __builtin_arm64_vld4_v(&r, __a, 3); r; }) +#define vld4_f16(__a) __extension__ ({ \ + float16x4x4_t r; __builtin_arm64_vld4_v(&r, __a, 6); r; }) +#define vld4_f32(__a) __extension__ ({ \ + float32x2x4_t r; __builtin_arm64_vld4_v(&r, __a, 7); r; }) +#define vld4_f64(__a) __extension__ ({ \ + float64x1x4_t r; __builtin_arm64_vld4_v(&r, __a, 8); r; }) +#define vld4_p8(__a) __extension__ ({ \ + poly8x8x4_t r; __builtin_arm64_vld4_v(&r, __a, 4); r; }) +#define vld4_p16(__a) __extension__ ({ \ + poly16x4x4_t r; __builtin_arm64_vld4_v(&r, __a, 5); r; }) + +#define vld4_dup_u8(__a) __extension__ ({ \ + uint8x8x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 16); r; }) +#define vld4_dup_u16(__a) __extension__ ({ \ + uint16x4x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 17); r; }) +#define vld4_dup_u32(__a) __extension__ ({ \ + uint32x2x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 18); r; }) +#define vld4_dup_u64(__a) __extension__ ({ \ + uint64x1x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 19); r; }) +#define vld4_dup_s8(__a) __extension__ ({ \ + int8x8x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 0); r; }) +#define vld4_dup_s16(__a) __extension__ ({ \ + int16x4x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 1); r; }) +#define vld4_dup_s32(__a) __extension__ ({ \ + int32x2x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 2); r; }) +#define vld4_dup_s64(__a) __extension__ ({ \ + int64x1x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 3); r; }) +#define vld4_dup_f16(__a) __extension__ ({ \ + float16x4x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 6); r; }) +#define vld4_dup_f32(__a) __extension__ ({ \ + float32x2x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 7); r; }) +#define vld4_dup_f64(__a) __extension__ ({ \ + float64x1x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 8); r; }) +#define vld4_dup_p8(__a) __extension__ ({ \ + poly8x8x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 4); r; }) +#define vld4_dup_p16(__a) __extension__ ({ \ + poly16x4x4_t r; __builtin_arm64_vld4_dup_v(&r, __a, 5); r; }) + +#define vld4q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16x4_t __b = (b); \ + uint8x16x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 48); r; }) +#define vld4q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x4_t __b = (b); \ + uint16x8x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); r; }) +#define vld4q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x4_t __b = (b); \ + uint32x4x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); r; }) +#define vld4q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2x4_t __b = (b); \ + uint64x2x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 51); r; }) +#define vld4q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16x4_t __b = (b); \ + int8x16x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 32); r; }) +#define vld4q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x4_t __b = (b); \ + int16x8x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); r; }) +#define vld4q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x4_t __b = (b); \ + int32x4x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); r; }) +#define vld4q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2x4_t __b = (b); \ + int64x2x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 35); r; }) +#define vld4q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x4_t __b = (b); \ + float16x8x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); r; }) +#define vld4q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x4_t __b = (b); \ + float32x4x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); r; }) +#define vld4q_lane_f64(__a, b, __c) __extension__ ({ \ + float64x2x4_t __b = (b); \ + float64x2x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 40); r; }) +#define vld4q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16x4_t __b = (b); \ + poly8x16x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 36); r; }) +#define vld4q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x4_t __b = (b); \ + poly16x8x4_t r; __builtin_arm64_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); r; }) +#define vld4_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x4_t __b = (b); \ + uint8x8x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); r; }) +#define vld4_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x4_t __b = (b); \ + uint16x4x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); r; }) +#define vld4_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x4_t __b = (b); \ + uint32x2x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); r; }) +#define vld4_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x1x4_t __b = (b); \ + uint64x1x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 19); r; }) +#define vld4_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x4_t __b = (b); \ + int8x8x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); r; }) +#define vld4_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x4_t __b = (b); \ + int16x4x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); r; }) +#define vld4_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x4_t __b = (b); \ + int32x2x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); r; }) +#define vld4_lane_s64(__a, b, __c) __extension__ ({ \ + int64x1x4_t __b = (b); \ + int64x1x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 3); r; }) +#define vld4_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x4_t __b = (b); \ + float16x4x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); r; }) +#define vld4_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x4_t __b = (b); \ + float32x2x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); r; }) +#define vld4_lane_f64(__a, b, __c) __extension__ ({ \ + float64x1x4_t __b = (b); \ + float64x1x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 8); r; }) +#define vld4_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x4_t __b = (b); \ + poly8x8x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); r; }) +#define vld4_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x4_t __b = (b); \ + poly16x4x4_t r; __builtin_arm64_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); r; }) + +__ai int8x8_t vmax_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vmax_v(__a, __b, 0); } +__ai int16x4_t vmax_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vmax_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vmax_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vmax_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vmax_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vmax_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vmax_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vmax_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vmax_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vmax_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vmaxq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vmaxq_v(__a, __b, 32); } +__ai int16x8_t vmaxq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vmaxq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vmaxq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vmaxq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vmaxq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vmaxq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vmaxq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float32x2_t vmaxnm_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vmaxnm_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vmaxnmq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vmaxnmq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float32x2_t vpmaxnm_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vpmaxnm_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vpmaxnmq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vpmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpmaxnmq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vpmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x8_t vmin_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vmin_v(__a, __b, 0); } +__ai int16x4_t vmin_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vmin_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vmin_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vmin_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vmin_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vmin_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vmin_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vmin_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vmin_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vmin_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vminq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vminq_v(__a, __b, 32); } +__ai int16x8_t vminq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vminq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vminq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vminq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vminq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vminq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vminq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vminq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vminq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vminq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vminq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vminq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vminq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vminq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float32x2_t vminnm_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vminnm_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vminnmq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vminnmq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vminnmq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vminnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float32x2_t vpminnm_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vpminnm_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vpminnmq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vpminnmq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpminnmq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vpminnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x8_t vmla_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a + (__b * __c); } +__ai int16x4_t vmla_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a + (__b * __c); } +__ai int32x2_t vmla_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a + (__b * __c); } +__ai float32x2_t vmla_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { + return __a + (__b * __c); } +__ai uint8x8_t vmla_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a + (__b * __c); } +__ai uint16x4_t vmla_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a + (__b * __c); } +__ai uint32x2_t vmla_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a + (__b * __c); } +__ai int8x16_t vmlaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { + return __a + (__b * __c); } +__ai int16x8_t vmlaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { + return __a + (__b * __c); } +__ai int32x4_t vmlaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { + return __a + (__b * __c); } +__ai float32x4_t vmlaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { + return __a + (__b * __c); } +__ai float64x2_t vmlaq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) { + return __a + (__b * __c); } +__ai uint8x16_t vmlaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return __a + (__b * __c); } +__ai uint16x8_t vmlaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return __a + (__b * __c); } +__ai uint32x4_t vmlaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return __a + (__b * __c); } + +__ai int16x8_t vmlal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a + vmull_s8(__b, __c); } +__ai int32x4_t vmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a + vmull_s16(__b, __c); } +__ai int64x2_t vmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a + vmull_s32(__b, __c); } +__ai uint16x8_t vmlal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a + vmull_u8(__b, __c); } +__ai uint32x4_t vmlal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a + vmull_u16(__b, __c); } +__ai uint64x2_t vmlal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a + vmull_u32(__b, __c); } + +__ai int16x8_t vmlal_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) { + return __a + vmull_high_s8(__b, __c); } +__ai int32x4_t vmlal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return __a + vmull_high_s16(__b, __c); } +__ai int64x2_t vmlal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return __a + vmull_high_s32(__b, __c); } +__ai uint16x8_t vmlal_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) { + return __a + vmull_high_u8(__b, __c); } +__ai uint32x4_t vmlal_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) { + return __a + vmull_high_u16(__b, __c); } +__ai uint64x2_t vmlal_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) { + return __a + vmull_high_u32(__b, __c); } + +#define vmlal_high_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + __a + vmull_high_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlal_high_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + __a + vmull_high_s32(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_high_lane_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ + __a + vmull_high_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlal_high_lane_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ + __a + vmull_high_u32(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +#define vmlal_high_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + __a + vmull_high_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlal_high_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + __a + vmull_high_s32(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_high_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \ + __a + vmull_high_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlal_high_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \ + __a + vmull_high_u32(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int32x4_t vmlal_high_n_s16(int32x4_t __a, int16x8_t __b, int16_t __c) { + return __a + vmull_high_s16(__b, (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai int64x2_t vmlal_high_n_s32(int64x2_t __a, int32x4_t __b, int32_t __c) { + return __a + vmull_high_s32(__b, (int32x4_t){ __c, __c, __c, __c }); } +__ai uint32x4_t vmlal_high_n_u16(uint32x4_t __a, uint16x8_t __b, uint16_t __c) { + return __a + vmull_high_u16(__b, (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai uint64x2_t vmlal_high_n_u32(uint64x2_t __a, uint32x4_t __b, uint32_t __c) { + return __a + vmull_high_u32(__b, (uint32x4_t){ __c, __c, __c, __c }); } + +#define vmlal_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlal_lane_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ + __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_lane_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ + __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vmlal_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlal_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \ + __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \ + __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { + return __a + vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); } +__ai int64x2_t vmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { + return __a + vmull_s32(__b, (int32x2_t){ __c, __c }); } +__ai uint32x4_t vmlal_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) { + return __a + vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); } +__ai uint64x2_t vmlal_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) { + return __a + vmull_u32(__b, (uint32x2_t){ __c, __c }); } + +#define vmla_lane_s16(a, b, c, __d) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmla_lane_s32(a, b, c, __d) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmla_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmla_lane_u16(a, b, c, __d) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmla_lane_u32(a, b, c, __d) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlaq_lane_s16(a, b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlaq_lane_s32(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlaq_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlaq_lane_u16(a, b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlaq_lane_u32(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +#define vmla_laneq_s16(a, b, c, __d) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmla_laneq_s32(a, b, c, __d) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmla_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmla_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlaq_laneq_s16(a, b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlaq_laneq_s32(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlaq_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlaq_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int16x4_t vmla_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) { + return __a + (__b * (int16x4_t){ __c, __c, __c, __c }); } +__ai int32x2_t vmla_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) { + return __a + (__b * (int32x2_t){ __c, __c }); } +__ai float32x2_t vmla_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { + return __a + (__b * (float32x2_t){ __c, __c }); } +__ai uint16x4_t vmla_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) { + return __a + (__b * (uint16x4_t){ __c, __c, __c, __c }); } +__ai uint32x2_t vmla_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) { + return __a + (__b * (uint32x2_t){ __c, __c }); } +__ai int16x8_t vmlaq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) { + return __a + (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai int32x4_t vmlaq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) { + return __a + (__b * (int32x4_t){ __c, __c, __c, __c }); } +__ai uint16x8_t vmlaq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) { + return __a + (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai uint32x4_t vmlaq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) { + return __a + (__b * (uint32x4_t){ __c, __c, __c, __c }); } +__ai float32x4_t vmlaq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { + return __a + (__b * (float32x4_t){ __c, __c, __c, __c }); } +__ai float64x2_t vmlaq_n_f64(float64x2_t __a, float64x2_t __b, float64_t __c) { + return __a + (__b * (float64x2_t){ __c, __c }); } + +__ai int8x8_t vmls_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a - (__b * __c); } +__ai int16x4_t vmls_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a - (__b * __c); } +__ai int32x2_t vmls_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a - (__b * __c); } +__ai float32x2_t vmls_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { + return __a - (__b * __c); } +__ai uint8x8_t vmls_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a - (__b * __c); } +__ai uint16x4_t vmls_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a - (__b * __c); } +__ai uint32x2_t vmls_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a - (__b * __c); } +__ai int8x16_t vmlsq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { + return __a - (__b * __c); } +__ai int16x8_t vmlsq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { + return __a - (__b * __c); } +__ai int32x4_t vmlsq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { + return __a - (__b * __c); } +__ai float32x4_t vmlsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { + return __a - (__b * __c); } +__ai float64x2_t vmlsq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) { + return __a - (__b * __c); } +__ai uint8x16_t vmlsq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return __a - (__b * __c); } +__ai uint16x8_t vmlsq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return __a - (__b * __c); } +__ai uint32x4_t vmlsq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return __a - (__b * __c); } + +__ai int16x8_t vmlsl_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a - vmull_s8(__b, __c); } +__ai int32x4_t vmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a - vmull_s16(__b, __c); } +__ai int64x2_t vmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a - vmull_s32(__b, __c); } +__ai uint16x8_t vmlsl_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a - vmull_u8(__b, __c); } +__ai uint32x4_t vmlsl_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a - vmull_u16(__b, __c); } +__ai uint64x2_t vmlsl_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a - vmull_u32(__b, __c); } + +__ai int16x8_t vmlsl_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) { + return __a - vmull_high_s8(__b, __c); } +__ai int32x4_t vmlsl_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return __a - vmull_high_s16(__b, __c); } +__ai int64x2_t vmlsl_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return __a - vmull_high_s32(__b, __c); } +__ai uint16x8_t vmlsl_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) { + return __a - vmull_high_u8(__b, __c); } +__ai uint32x4_t vmlsl_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) { + return __a - vmull_high_u16(__b, __c); } +__ai uint64x2_t vmlsl_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) { + return __a - vmull_high_u32(__b, __c); } + +#define vmlsl_high_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + __a - vmull_high_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsl_high_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + __a - vmull_high_s32(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_high_lane_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ + __a - vmull_high_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsl_high_lane_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ + __a - vmull_high_u32(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +#define vmlsl_high_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + __a - vmull_high_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsl_high_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + __a - vmull_high_s32(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_high_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \ + __a - vmull_high_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsl_high_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \ + __a - vmull_high_u32(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int32x4_t vmlsl_high_n_s16(int32x4_t __a, int16x8_t __b, int16_t __c) { + return __a - vmull_high_s16(__b, (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai int64x2_t vmlsl_high_n_s32(int64x2_t __a, int32x4_t __b, int32_t __c) { + return __a - vmull_high_s32(__b, (int32x4_t){ __c, __c, __c, __c }); } +__ai uint32x4_t vmlsl_high_n_u16(uint32x4_t __a, uint16x8_t __b, uint16_t __c) { + return __a - vmull_high_u16(__b, (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai uint64x2_t vmlsl_high_n_u32(uint64x2_t __a, uint32x4_t __b, uint32_t __c) { + return __a - vmull_high_u32(__b, (uint32x4_t){ __c, __c, __c, __c }); } + +#define vmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsl_lane_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ + __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_lane_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ + __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vmlsl_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsl_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \ + __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \ + __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { + return __a - vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); } +__ai int64x2_t vmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { + return __a - vmull_s32(__b, (int32x2_t){ __c, __c }); } +__ai uint32x4_t vmlsl_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) { + return __a - vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); } +__ai uint64x2_t vmlsl_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) { + return __a - vmull_u32(__b, (uint32x2_t){ __c, __c }); } + +#define vmls_lane_s16(a, b, c, __d) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmls_lane_s32(a, b, c, __d) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmls_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmls_lane_u16(a, b, c, __d) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmls_lane_u32(a, b, c, __d) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsq_lane_s16(a, b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsq_lane_s32(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsq_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsq_lane_u16(a, b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsq_lane_u32(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +#define vmls_laneq_s16(a, b, c, __d) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmls_laneq_s32(a, b, c, __d) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmls_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmls_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsq_laneq_s16(a, b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsq_laneq_s32(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsq_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsq_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int16x4_t vmls_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) { + return __a - (__b * (int16x4_t){ __c, __c, __c, __c }); } +__ai int32x2_t vmls_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) { + return __a - (__b * (int32x2_t){ __c, __c }); } +__ai float32x2_t vmls_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { + return __a - (__b * (float32x2_t){ __c, __c }); } +__ai uint16x4_t vmls_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) { + return __a - (__b * (uint16x4_t){ __c, __c, __c, __c }); } +__ai uint32x2_t vmls_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) { + return __a - (__b * (uint32x2_t){ __c, __c }); } +__ai int16x8_t vmlsq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) { + return __a - (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai int32x4_t vmlsq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) { + return __a - (__b * (int32x4_t){ __c, __c, __c, __c }); } +__ai uint16x8_t vmlsq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) { + return __a - (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai uint32x4_t vmlsq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) { + return __a - (__b * (uint32x4_t){ __c, __c, __c, __c }); } +__ai float32x4_t vmlsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { + return __a - (__b * (float32x4_t){ __c, __c, __c, __c }); } +__ai float64x2_t vmlsq_n_f64(float64x2_t __a, float64x2_t __b, float64_t __c) { + return __a - (__b * (float64x2_t){ __c, __c }); } + +__ai int8x8_t vmovn_s16(int16x8_t __a) { + return (int8x8_t)__builtin_arm64_vmovn_v((int8x16_t)__a, 0); } +__ai int16x4_t vmovn_s32(int32x4_t __a) { + return (int16x4_t)__builtin_arm64_vmovn_v((int8x16_t)__a, 1); } +__ai int32x2_t vmovn_s64(int64x2_t __a) { + return (int32x2_t)__builtin_arm64_vmovn_v((int8x16_t)__a, 2); } +__ai uint8x8_t vmovn_u16(uint16x8_t __a) { + return (uint8x8_t)__builtin_arm64_vmovn_v((int8x16_t)__a, 16); } +__ai uint16x4_t vmovn_u32(uint32x4_t __a) { + return (uint16x4_t)__builtin_arm64_vmovn_v((int8x16_t)__a, 17); } +__ai uint32x2_t vmovn_u64(uint64x2_t __a) { + return (uint32x2_t)__builtin_arm64_vmovn_v((int8x16_t)__a, 18); } + +__ai int8x16_t vmovn_high_s16(int8x8_t __a, int16x8_t __b) { + return (int8x16_t)__builtin_arm64_vmovn_high_v(__a, (int8x16_t)__b, 32); } +__ai int16x8_t vmovn_high_s32(int16x4_t __a, int32x4_t __b) { + return (int16x8_t)__builtin_arm64_vmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vmovn_high_s64(int32x2_t __a, int64x2_t __b) { + return (int32x4_t)__builtin_arm64_vmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vmovn_high_u16(uint8x8_t __a, uint16x8_t __b) { + return (uint8x16_t)__builtin_arm64_vmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vmovn_high_u32(uint16x4_t __a, uint32x4_t __b) { + return (uint16x8_t)__builtin_arm64_vmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vmovn_high_u64(uint32x2_t __a, uint64x2_t __b) { + return (uint32x4_t)__builtin_arm64_vmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 50); } + +__ai uint8x8_t vmov_n_u8(uint8_t __a) { + return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint16x4_t vmov_n_u16(uint16_t __a) { + return (uint16x4_t){ __a, __a, __a, __a }; } +__ai uint32x2_t vmov_n_u32(uint32_t __a) { + return (uint32x2_t){ __a, __a }; } +__ai int8x8_t vmov_n_s8(int8_t __a) { + return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int16x4_t vmov_n_s16(int16_t __a) { + return (int16x4_t){ __a, __a, __a, __a }; } +__ai int32x2_t vmov_n_s32(int32_t __a) { + return (int32x2_t){ __a, __a }; } +__ai poly8x8_t vmov_n_p8(poly8_t __a) { + return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly16x4_t vmov_n_p16(poly16_t __a) { + return (poly16x4_t){ __a, __a, __a, __a }; } +__ai float16x4_t vmov_n_f16(float16_t __a) { + return (float16x4_t){ __a, __a, __a, __a }; } +__ai float32x2_t vmov_n_f32(float32_t __a) { + return (float32x2_t){ __a, __a }; } +__ai uint8x16_t vmovq_n_u8(uint8_t __a) { + return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint16x8_t vmovq_n_u16(uint16_t __a) { + return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint32x4_t vmovq_n_u32(uint32_t __a) { + return (uint32x4_t){ __a, __a, __a, __a }; } +__ai int8x16_t vmovq_n_s8(int8_t __a) { + return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int16x8_t vmovq_n_s16(int16_t __a) { + return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int32x4_t vmovq_n_s32(int32_t __a) { + return (int32x4_t){ __a, __a, __a, __a }; } +__ai float16x8_t vmovq_n_f16(float16_t __a) { + return (float16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly8x16_t vmovq_n_p8(poly8_t __a) { + return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly16x8_t vmovq_n_p16(poly16_t __a) { + return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai float32x4_t vmovq_n_f32(float32_t __a) { + return (float32x4_t){ __a, __a, __a, __a }; } +__ai float64x2_t vmovq_n_f64(float64_t __a) { + return (float64x2_t){ __a, __a }; } +__ai int64x1_t vmov_n_s64(int64_t __a) { + return (int64x1_t){ __a }; } +__ai uint64x1_t vmov_n_u64(uint64_t __a) { + return (uint64x1_t){ __a }; } +__ai int64x2_t vmovq_n_s64(int64_t __a) { + return (int64x2_t){ __a, __a }; } +__ai uint64x2_t vmovq_n_u64(uint64_t __a) { + return (uint64x2_t){ __a, __a }; } + +__ai int8x8_t vmul_s8(int8x8_t __a, int8x8_t __b) { + return __a * __b; } +__ai int16x4_t vmul_s16(int16x4_t __a, int16x4_t __b) { + return __a * __b; } +__ai int32x2_t vmul_s32(int32x2_t __a, int32x2_t __b) { + return __a * __b; } +__ai float32x2_t vmul_f32(float32x2_t __a, float32x2_t __b) { + return __a * __b; } +__ai uint8x8_t vmul_u8(uint8x8_t __a, uint8x8_t __b) { + return __a * __b; } +__ai uint16x4_t vmul_u16(uint16x4_t __a, uint16x4_t __b) { + return __a * __b; } +__ai uint32x2_t vmul_u32(uint32x2_t __a, uint32x2_t __b) { + return __a * __b; } +__ai int8x16_t vmulq_s8(int8x16_t __a, int8x16_t __b) { + return __a * __b; } +__ai int16x8_t vmulq_s16(int16x8_t __a, int16x8_t __b) { + return __a * __b; } +__ai int32x4_t vmulq_s32(int32x4_t __a, int32x4_t __b) { + return __a * __b; } +__ai float32x4_t vmulq_f32(float32x4_t __a, float32x4_t __b) { + return __a * __b; } +__ai float64x2_t vmulq_f64(float64x2_t __a, float64x2_t __b) { + return __a * __b; } +__ai uint8x16_t vmulq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a * __b; } +__ai uint16x8_t vmulq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a * __b; } +__ai uint32x4_t vmulq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a * __b; } + +#define vmull_high_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + vmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_high_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + vmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmull_high_lane_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x4_t __b = (b); \ + vmull_u16(vget_high_u16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_high_lane_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x2_t __b = (b); \ + vmull_u32(vget_high_u32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vmull_high_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + vmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_high_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + vmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmull_high_laneq_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + vmull_u16(vget_high_u16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_high_laneq_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + vmull_u32(vget_high_u32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) + +__ai int32x4_t vmull_high_n_s16(int16x8_t __a, int16_t __b) { + return vmull_s16(vget_high_s16(__a), (int16x4_t){ __b, __b, __b, __b }); } +__ai int64x2_t vmull_high_n_s32(int32x4_t __a, int32_t __b) { + return vmull_s32(vget_high_s32(__a), (int32x2_t){ __b, __b }); } +__ai uint32x4_t vmull_high_n_u16(uint16x8_t __a, uint16_t __b) { + return vmull_u16(vget_high_u16(__a), (uint16x4_t){ __b, __b, __b, __b }); } +__ai uint64x2_t vmull_high_n_u32(uint32x4_t __a, uint32_t __b) { + return vmull_u32(vget_high_u32(__a), (uint32x2_t){ __b, __b }); } + +#define vmull_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmull_lane_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_lane_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vmull_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmull_laneq_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x8_t __b = (b); \ + vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_laneq_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x4_t __b = (b); \ + vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +__ai int32x4_t vmull_n_s16(int16x4_t __a, int16_t __b) { + return (int32x4_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); } +__ai int64x2_t vmull_n_s32(int32x2_t __a, int32_t __b) { + return (int64x2_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); } +__ai uint32x4_t vmull_n_u16(uint16x4_t __a, uint16_t __b) { + return (uint32x4_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)(uint16x4_t){ __b, __b, __b, __b }, 50); } +__ai uint64x2_t vmull_n_u32(uint32x2_t __a, uint32_t __b) { + return (uint64x2_t)__builtin_arm64_vmull_v((int8x8_t)__a, (int8x8_t)(uint32x2_t){ __b, __b }, 51); } + +__ai poly8x8_t vmul_p8(poly8x8_t __a, poly8x8_t __b) { + return (poly8x8_t)__builtin_arm64_vmul_v((int8x8_t)__a, (int8x8_t)__b, 4); } +__ai poly8x16_t vmulq_p8(poly8x16_t __a, poly8x16_t __b) { + return (poly8x16_t)__builtin_arm64_vmulq_v((int8x16_t)__a, (int8x16_t)__b, 36); } + +__ai float32x2_t vmulx_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vmulx_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vmulxq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vmulxq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vmulxq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vmulxq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float64_t vmulxd_f64(float64_t __a, float64_t __b) { + return (float64_t)__builtin_arm64_vmulxd_f64(__a, __b); } + +__ai float32_t vmulxs_f32(float32_t __a, float32_t __b) { + return (float32_t)__builtin_arm64_vmulxs_f32(__a, __b); } + +#define vmulx_lane_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); \ + (float32x2_t)__builtin_arm64_vmulx_lane_v((int8x8_t)__a, (int8x8_t)__b, __c, 7); }) +#define vmulxq_lane_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); \ + (float32x4_t)__builtin_arm64_vmulxq_lane_v((int8x16_t)__a, (int8x16_t)__b, __c, 39); }) +#define vmulxq_lane_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); \ + (float64x2_t)__builtin_arm64_vmulxq_lane_v((int8x16_t)__a, (int8x16_t)__b, __c, 40); }) + +#define vmul_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmul_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmul_lane_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmul_lane_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmul_lane_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmulq_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) +#define vmulq_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_lane_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_lane_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x1_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmulq_lane_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) +#define vmulq_lane_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) + +#define vmul_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmul_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmul_laneq_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmul_laneq_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x8_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmul_laneq_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmulq_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) +#define vmulq_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_laneq_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_laneq_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmulq_laneq_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) +#define vmulq_laneq_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) + +__ai int16x4_t vmul_n_s16(int16x4_t __a, int16_t __b) { + return __a * (int16x4_t){ __b, __b, __b, __b }; } +__ai int32x2_t vmul_n_s32(int32x2_t __a, int32_t __b) { + return __a * (int32x2_t){ __b, __b }; } +__ai float32x2_t vmul_n_f32(float32x2_t __a, float32_t __b) { + return __a * (float32x2_t){ __b, __b }; } +__ai uint16x4_t vmul_n_u16(uint16x4_t __a, uint16_t __b) { + return __a * (uint16x4_t){ __b, __b, __b, __b }; } +__ai uint32x2_t vmul_n_u32(uint32x2_t __a, uint32_t __b) { + return __a * (uint32x2_t){ __b, __b }; } +__ai int16x8_t vmulq_n_s16(int16x8_t __a, int16_t __b) { + return __a * (int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; } +__ai int32x4_t vmulq_n_s32(int32x4_t __a, int32_t __b) { + return __a * (int32x4_t){ __b, __b, __b, __b }; } +__ai float32x4_t vmulq_n_f32(float32x4_t __a, float32_t __b) { + return __a * (float32x4_t){ __b, __b, __b, __b }; } +__ai float64x2_t vmulq_n_f64(float64x2_t __a, float64_t __b) { + return __a * (float64x2_t){ __b, __b }; } +__ai uint16x8_t vmulq_n_u16(uint16x8_t __a, uint16_t __b) { + return __a * (uint16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; } +__ai uint32x4_t vmulq_n_u32(uint32x4_t __a, uint32_t __b) { + return __a * (uint32x4_t){ __b, __b, __b, __b }; } + +__ai int8x8_t vmvn_s8(int8x8_t __a) { + return ~__a; } +__ai int16x4_t vmvn_s16(int16x4_t __a) { + return ~__a; } +__ai int32x2_t vmvn_s32(int32x2_t __a) { + return ~__a; } +__ai uint8x8_t vmvn_u8(uint8x8_t __a) { + return ~__a; } +__ai uint16x4_t vmvn_u16(uint16x4_t __a) { + return ~__a; } +__ai uint32x2_t vmvn_u32(uint32x2_t __a) { + return ~__a; } +__ai poly8x8_t vmvn_p8(poly8x8_t __a) { + return ~__a; } +__ai int8x16_t vmvnq_s8(int8x16_t __a) { + return ~__a; } +__ai int16x8_t vmvnq_s16(int16x8_t __a) { + return ~__a; } +__ai int32x4_t vmvnq_s32(int32x4_t __a) { + return ~__a; } +__ai uint8x16_t vmvnq_u8(uint8x16_t __a) { + return ~__a; } +__ai uint16x8_t vmvnq_u16(uint16x8_t __a) { + return ~__a; } +__ai uint32x4_t vmvnq_u32(uint32x4_t __a) { + return ~__a; } +__ai poly8x16_t vmvnq_p8(poly8x16_t __a) { + return ~__a; } + +__ai int8x8_t vneg_s8(int8x8_t __a) { + return -__a; } +__ai int16x4_t vneg_s16(int16x4_t __a) { + return -__a; } +__ai int32x2_t vneg_s32(int32x2_t __a) { + return -__a; } +__ai float32x2_t vneg_f32(float32x2_t __a) { + return -__a; } +__ai int8x16_t vnegq_s8(int8x16_t __a) { + return -__a; } +__ai int16x8_t vnegq_s16(int16x8_t __a) { + return -__a; } +__ai int32x4_t vnegq_s32(int32x4_t __a) { + return -__a; } +__ai int64x2_t vnegq_s64(int64x2_t __a) { + return -__a; } +__ai float32x4_t vnegq_f32(float32x4_t __a) { + return -__a; } +__ai float64x2_t vnegq_f64(float64x2_t __a) { + return -__a; } + +__ai int8x8_t vorn_s8(int8x8_t __a, int8x8_t __b) { + return __a | ~__b; } +__ai int16x4_t vorn_s16(int16x4_t __a, int16x4_t __b) { + return __a | ~__b; } +__ai int32x2_t vorn_s32(int32x2_t __a, int32x2_t __b) { + return __a | ~__b; } +__ai int64x1_t vorn_s64(int64x1_t __a, int64x1_t __b) { + return __a | ~__b; } +__ai uint8x8_t vorn_u8(uint8x8_t __a, uint8x8_t __b) { + return __a | ~__b; } +__ai uint16x4_t vorn_u16(uint16x4_t __a, uint16x4_t __b) { + return __a | ~__b; } +__ai uint32x2_t vorn_u32(uint32x2_t __a, uint32x2_t __b) { + return __a | ~__b; } +__ai uint64x1_t vorn_u64(uint64x1_t __a, uint64x1_t __b) { + return __a | ~__b; } +__ai int8x16_t vornq_s8(int8x16_t __a, int8x16_t __b) { + return __a | ~__b; } +__ai int16x8_t vornq_s16(int16x8_t __a, int16x8_t __b) { + return __a | ~__b; } +__ai int32x4_t vornq_s32(int32x4_t __a, int32x4_t __b) { + return __a | ~__b; } +__ai int64x2_t vornq_s64(int64x2_t __a, int64x2_t __b) { + return __a | ~__b; } +__ai uint8x16_t vornq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a | ~__b; } +__ai uint16x8_t vornq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a | ~__b; } +__ai uint32x4_t vornq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a | ~__b; } +__ai uint64x2_t vornq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a | ~__b; } + +__ai int8x8_t vorr_s8(int8x8_t __a, int8x8_t __b) { + return __a | __b; } +__ai int16x4_t vorr_s16(int16x4_t __a, int16x4_t __b) { + return __a | __b; } +__ai int32x2_t vorr_s32(int32x2_t __a, int32x2_t __b) { + return __a | __b; } +__ai int64x1_t vorr_s64(int64x1_t __a, int64x1_t __b) { + return __a | __b; } +__ai uint8x8_t vorr_u8(uint8x8_t __a, uint8x8_t __b) { + return __a | __b; } +__ai uint16x4_t vorr_u16(uint16x4_t __a, uint16x4_t __b) { + return __a | __b; } +__ai uint32x2_t vorr_u32(uint32x2_t __a, uint32x2_t __b) { + return __a | __b; } +__ai uint64x1_t vorr_u64(uint64x1_t __a, uint64x1_t __b) { + return __a | __b; } +__ai int8x16_t vorrq_s8(int8x16_t __a, int8x16_t __b) { + return __a | __b; } +__ai int16x8_t vorrq_s16(int16x8_t __a, int16x8_t __b) { + return __a | __b; } +__ai int32x4_t vorrq_s32(int32x4_t __a, int32x4_t __b) { + return __a | __b; } +__ai int64x2_t vorrq_s64(int64x2_t __a, int64x2_t __b) { + return __a | __b; } +__ai uint8x16_t vorrq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a | __b; } +__ai uint16x8_t vorrq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a | __b; } +__ai uint32x4_t vorrq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a | __b; } +__ai uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a | __b; } + +__ai int16x4_t vpadal_s8(int16x4_t __a, int8x8_t __b) { + return (int16x4_t)__builtin_arm64_vpadal_v((int8x8_t)__a, __b, 1); } +__ai int32x2_t vpadal_s16(int32x2_t __a, int16x4_t __b) { + return (int32x2_t)__builtin_arm64_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vpadal_s32(int64x1_t __a, int32x2_t __b) { + return (int64x1_t)__builtin_arm64_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint16x4_t vpadal_u8(uint16x4_t __a, uint8x8_t __b) { + return (uint16x4_t)__builtin_arm64_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vpadal_u16(uint32x2_t __a, uint16x4_t __b) { + return (uint32x2_t)__builtin_arm64_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vpadal_u32(uint64x1_t __a, uint32x2_t __b) { + return (uint64x1_t)__builtin_arm64_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int16x8_t vpadalq_s8(int16x8_t __a, int8x16_t __b) { + return (int16x8_t)__builtin_arm64_vpadalq_v((int8x16_t)__a, __b, 33); } +__ai int32x4_t vpadalq_s16(int32x4_t __a, int16x8_t __b) { + return (int32x4_t)__builtin_arm64_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vpadalq_s32(int64x2_t __a, int32x4_t __b) { + return (int64x2_t)__builtin_arm64_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint16x8_t vpadalq_u8(uint16x8_t __a, uint8x16_t __b) { + return (uint16x8_t)__builtin_arm64_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vpadalq_u16(uint32x4_t __a, uint16x8_t __b) { + return (uint32x4_t)__builtin_arm64_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vpadalq_u32(uint64x2_t __a, uint32x4_t __b) { + return (uint64x2_t)__builtin_arm64_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int8x8_t vpadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vpadd_v(__a, __b, 0); } +__ai int16x4_t vpadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vpadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vpadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vpadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vpadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vpadd_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vpaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vpaddq_v(__a, __b, 32); } +__ai int16x8_t vpaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vpaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vpaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vpaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vpaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai int64x2_t vpaddq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint64x2_t vpaddq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai float32x4_t vpaddq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpaddq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int64_t vpaddd_s64(int64x2_t __a) { + return (int64_t)__builtin_arm64_vpaddd_s64(__a); } +__ai float64_t vpaddd_f64(float64x2_t __a) { + return (float64_t)__builtin_arm64_vpaddd_f64(__a); } + +__ai int16x4_t vpaddl_s8(int8x8_t __a) { + return (int16x4_t)__builtin_arm64_vpaddl_v(__a, 1); } +__ai int32x2_t vpaddl_s16(int16x4_t __a) { + return (int32x2_t)__builtin_arm64_vpaddl_v((int8x8_t)__a, 2); } +__ai int64x1_t vpaddl_s32(int32x2_t __a) { + return (int64x1_t)__builtin_arm64_vpaddl_v((int8x8_t)__a, 3); } +__ai uint16x4_t vpaddl_u8(uint8x8_t __a) { + return (uint16x4_t)__builtin_arm64_vpaddl_v((int8x8_t)__a, 17); } +__ai uint32x2_t vpaddl_u16(uint16x4_t __a) { + return (uint32x2_t)__builtin_arm64_vpaddl_v((int8x8_t)__a, 18); } +__ai uint64x1_t vpaddl_u32(uint32x2_t __a) { + return (uint64x1_t)__builtin_arm64_vpaddl_v((int8x8_t)__a, 19); } +__ai int16x8_t vpaddlq_s8(int8x16_t __a) { + return (int16x8_t)__builtin_arm64_vpaddlq_v(__a, 33); } +__ai int32x4_t vpaddlq_s16(int16x8_t __a) { + return (int32x4_t)__builtin_arm64_vpaddlq_v((int8x16_t)__a, 34); } +__ai int64x2_t vpaddlq_s32(int32x4_t __a) { + return (int64x2_t)__builtin_arm64_vpaddlq_v((int8x16_t)__a, 35); } +__ai uint16x8_t vpaddlq_u8(uint8x16_t __a) { + return (uint16x8_t)__builtin_arm64_vpaddlq_v((int8x16_t)__a, 49); } +__ai uint32x4_t vpaddlq_u16(uint16x8_t __a) { + return (uint32x4_t)__builtin_arm64_vpaddlq_v((int8x16_t)__a, 50); } +__ai uint64x2_t vpaddlq_u32(uint32x4_t __a) { + return (uint64x2_t)__builtin_arm64_vpaddlq_v((int8x16_t)__a, 51); } + +__ai float32_t vpadds_f32(float32x2_t __a) { + return (float32_t)__builtin_arm64_vpadds_f32(__a); } + +__ai int8x8_t vpmax_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vpmax_v(__a, __b, 0); } +__ai int16x4_t vpmax_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vpmax_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vpmax_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vpmax_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vpmax_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vpmax_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vpmaxq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vpmaxq_v(__a, __b, 32); } +__ai int16x8_t vpmaxq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vpmaxq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vpmaxq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vpmaxq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vpmaxq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vpmaxq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpmaxq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x8_t vpmin_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vpmin_v(__a, __b, 0); } +__ai int16x4_t vpmin_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vpmin_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vpmin_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vpmin_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vpmin_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vpmin_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vpminq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vpminq_v(__a, __b, 32); } +__ai int16x8_t vpminq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vpminq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vpminq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vpminq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vpminq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vpminq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpminq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x8_t vqabs_s8(int8x8_t __a) { + return (int8x8_t)__builtin_arm64_vqabs_v(__a, 0); } +__ai int16x4_t vqabs_s16(int16x4_t __a) { + return (int16x4_t)__builtin_arm64_vqabs_v((int8x8_t)__a, 1); } +__ai int32x2_t vqabs_s32(int32x2_t __a) { + return (int32x2_t)__builtin_arm64_vqabs_v((int8x8_t)__a, 2); } +__ai int8x16_t vqabsq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_arm64_vqabsq_v(__a, 32); } +__ai int16x8_t vqabsq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_arm64_vqabsq_v((int8x16_t)__a, 33); } +__ai int32x4_t vqabsq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_arm64_vqabsq_v((int8x16_t)__a, 34); } +__ai int64x2_t vqabsq_s64(int64x2_t __a) { + return (int64x2_t)__builtin_arm64_vqabsq_v((int8x16_t)__a, 35); } + +__ai int8_t vqabsb_s8(int8_t __a) { + return (int8_t)__builtin_arm64_vqabsb_s8(__a); } + +__ai int64_t vqabsd_s64(int64_t __a) { + return (int64_t)__builtin_arm64_vqabsd_s64(__a); } + +__ai int16_t vqabsh_s16(int16_t __a) { + return (int16_t)__builtin_arm64_vqabsh_s16(__a); } + +__ai int32_t vqabss_s32(int32_t __a) { + return (int32_t)__builtin_arm64_vqabss_s32(__a); } + +__ai int8x8_t vqadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vqadd_v(__a, __b, 0); } +__ai int16x4_t vqadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vqadd_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_arm64_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vqadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vqadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vqadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vqadd_u64(uint64x1_t __a, uint64x1_t __b) { + return (uint64x1_t)__builtin_arm64_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vqaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vqaddq_v(__a, __b, 32); } +__ai int16x8_t vqaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqaddq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vqaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vqaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vqaddq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int8_t vqaddb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_arm64_vqaddb_s8(__a, __b); } +__ai uint8_t vqaddb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_arm64_vqaddb_u8(__a, __b); } + +__ai int64_t vqaddd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vqaddd_s64(__a, __b); } +__ai uint64_t vqaddd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vqaddd_u64(__a, __b); } + +__ai int16_t vqaddh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_arm64_vqaddh_s16(__a, __b); } +__ai uint16_t vqaddh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_arm64_vqaddh_u16(__a, __b); } + +__ai int32_t vqadds_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_arm64_vqadds_s32(__a, __b); } +__ai uint32_t vqadds_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_arm64_vqadds_u32(__a, __b); } + +__ai int32x4_t vqdmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return vqaddq_s32(__a, vqdmull_s16(__b, __c)); } +__ai int64x2_t vqdmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return vqaddq_s64(__a, vqdmull_s32(__b, __c)); } + +__ai int32_t vqdmlalh_s16(int32_t __a, int16_t __b, int16_t __c) { + return vqadds_s32(__a, vqdmullh_s16(__b, __c)); } + +#define vqdmlalh_lane_s16(a, b, c, __d) __extension__ ({ \ + int32_t __a = (a); int16_t __b = (b); int16x8_t __c = (c); \ + vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int64_t vqdmlals_s32(int64_t __a, int32_t __b, int32_t __c) { + return vqaddd_s64(__a, vqdmulls_s32(__b, __c)); } + +#define vqdmlals_lane_s32(a, b, c, __d) __extension__ ({ \ + int64_t __a = (a); int32_t __b = (b); int32x4_t __c = (c); \ + vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vqdmlal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return (int32x4_t)__builtin_arm64_vqdmlal_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } +__ai int64x2_t vqdmlal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return (int64x2_t)__builtin_arm64_vqdmlal_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 35); } + +#define vqdmlal_high_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + vqdmlal_high_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vqdmlal_high_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + vqdmlal_high_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +#define vqdmlal_high_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + vqdmlal_high_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vqdmlal_high_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + vqdmlal_high_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int32x4_t vqdmlal_high_n_s16(int32x4_t __a, int16x8_t __b, int16_t __c) { + return vqdmlal_s16(__a, vget_high_s16(__b), (int16x4_t){ __c, __c, __c, __c }); } +__ai int64x2_t vqdmlal_high_n_s32(int64x2_t __a, int32x4_t __b, int32_t __c) { + return vqdmlal_s32(__a, vget_high_s32(__b), (int32x2_t){ __c, __c }); } + +#define vqdmlal_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlal_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vqdmlal_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlal_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vqdmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { + return vqaddq_s32(__a, vqdmull_s16(__b, (int16x4_t){ __c, __c, __c, __c })); } +__ai int64x2_t vqdmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { + return vqaddq_s64(__a, vqdmull_s32(__b, (int32x2_t){ __c, __c })); } + +__ai int32x4_t vqdmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return vqsubq_s32(__a, vqdmull_s16(__b, __c)); } +__ai int64x2_t vqdmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return vqsubq_s64(__a, vqdmull_s32(__b, __c)); } + +__ai int32_t vqdmlslh_s16(int32_t __a, int16_t __b, int16_t __c) { + return vqsubs_s32(__a, vqdmullh_s16(__b, __c)); } + +#define vqdmlslh_lane_s16(a, b, c, __d) __extension__ ({ \ + int32_t __a = (a); int16_t __b = (b); int16x8_t __c = (c); \ + vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int64_t vqdmlsls_s32(int64_t __a, int32_t __b, int32_t __c) { + return vqsubd_s64(__a, vqdmulls_s32(__b, __c)); } + +#define vqdmlsls_lane_s32(a, b, c, __d) __extension__ ({ \ + int64_t __a = (a); int32_t __b = (b); int32x4_t __c = (c); \ + vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vqdmlsl_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return (int32x4_t)__builtin_arm64_vqdmlsl_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } +__ai int64x2_t vqdmlsl_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return (int64x2_t)__builtin_arm64_vqdmlsl_high_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 35); } + +#define vqdmlsl_high_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + vqdmlsl_high_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vqdmlsl_high_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + vqdmlsl_high_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +#define vqdmlsl_high_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + vqdmlsl_high_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vqdmlsl_high_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + vqdmlsl_high_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __a, int16x8_t __b, int16_t __c) { + return vqdmlsl_s16(__a, vget_high_s16(__b), (int16x4_t){ __c, __c, __c, __c }); } +__ai int64x2_t vqdmlsl_high_n_s32(int64x2_t __a, int32x4_t __b, int32_t __c) { + return vqdmlsl_s32(__a, vget_high_s32(__b), (int32x2_t){ __c, __c }); } + +#define vqdmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vqdmlsl_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlsl_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vqdmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { + return vqsubq_s32(__a, vqdmull_s16(__b, (int16x4_t){ __c, __c, __c, __c })); } +__ai int64x2_t vqdmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { + return vqsubq_s64(__a, vqdmull_s32(__b, (int32x2_t){ __c, __c })); } + +__ai int16x4_t vqdmulh_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqdmulh_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int16x8_t vqdmulhq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqdmulhq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34); } + +#define vqdmulh_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmulh_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vqdmulhq_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) +#define vqdmulhq_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) + +#define vqdmulh_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmulh_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vqdmulhq_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) +#define vqdmulhq_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) + +__ai int16x4_t vqdmulh_n_s16(int16x4_t __a, int16_t __b) { + return (int16x4_t)__builtin_arm64_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 1); } +__ai int32x2_t vqdmulh_n_s32(int32x2_t __a, int32_t __b) { + return (int32x2_t)__builtin_arm64_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 2); } +__ai int16x8_t vqdmulhq_n_s16(int16x8_t __a, int16_t __b) { + return (int16x8_t)__builtin_arm64_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); } +__ai int32x4_t vqdmulhq_n_s32(int32x4_t __a, int32_t __b) { + return (int32x4_t)__builtin_arm64_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x4_t){ __b, __b, __b, __b }, 34); } + +#define vqdmullh_lane_s16(a, b, __c) __extension__ ({ \ + int16_t __a = (a); int16x8_t __b = (b); \ + (int32_t)__builtin_arm64_vqdmullh_lane_s16(__a, __b, __c); }) + +#define vqdmulls_lane_s32(a, b, __c) __extension__ ({ \ + int32_t __a = (a); int32x4_t __b = (b); \ + (int64_t)__builtin_arm64_vqdmulls_lane_s32(__a, __b, __c); }) + +__ai int32x4_t vqdmull_high_s16(int16x8_t __a, int16x8_t __b) { + return (int32x4_t)__builtin_arm64_vqdmull_high_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqdmull_high_s32(int32x4_t __a, int32x4_t __b) { + return (int64x2_t)__builtin_arm64_vqdmull_high_v((int8x16_t)__a, (int8x16_t)__b, 35); } + +#define vqdmull_high_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + vqdmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmull_high_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + vqdmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vqdmull_high_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + vqdmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmull_high_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + vqdmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) + +__ai int32x4_t vqdmull_high_n_s16(int16x8_t __a, int16_t __b) { + return vqdmull_s16(vget_high_s16(__a), (int16x4_t){ __b, __b, __b, __b }); } +__ai int64x2_t vqdmull_high_n_s32(int32x4_t __a, int32_t __b) { + return vqdmull_s32(vget_high_s32(__a), (int32x2_t){ __b, __b }); } + +#define vqdmull_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmull_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vqdmull_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmull_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +__ai int32x4_t vqdmull_n_s16(int16x4_t __a, int16_t __b) { + return (int32x4_t)__builtin_arm64_vqdmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); } +__ai int64x2_t vqdmull_n_s32(int32x2_t __a, int32_t __b) { + return (int64x2_t)__builtin_arm64_vqdmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); } + +__ai int8x8_t vqmovn_s16(int16x8_t __a) { + return (int8x8_t)__builtin_arm64_vqmovn_v((int8x16_t)__a, 0); } +__ai int16x4_t vqmovn_s32(int32x4_t __a) { + return (int16x4_t)__builtin_arm64_vqmovn_v((int8x16_t)__a, 1); } +__ai int32x2_t vqmovn_s64(int64x2_t __a) { + return (int32x2_t)__builtin_arm64_vqmovn_v((int8x16_t)__a, 2); } +__ai uint8x8_t vqmovn_u16(uint16x8_t __a) { + return (uint8x8_t)__builtin_arm64_vqmovn_v((int8x16_t)__a, 16); } +__ai uint16x4_t vqmovn_u32(uint32x4_t __a) { + return (uint16x4_t)__builtin_arm64_vqmovn_v((int8x16_t)__a, 17); } +__ai uint32x2_t vqmovn_u64(uint64x2_t __a) { + return (uint32x2_t)__builtin_arm64_vqmovn_v((int8x16_t)__a, 18); } + +__ai int32_t vqmovnd_s64(int64_t __a) { + return (int32_t)__builtin_arm64_vqmovnd_s64(__a); } +__ai uint32_t vqmovnd_u64(uint64_t __a) { + return (uint32_t)__builtin_arm64_vqmovnd_u64(__a); } + +__ai int8_t vqmovnh_s16(int16_t __a) { + return (int8_t)__builtin_arm64_vqmovnh_s16(__a); } +__ai uint8_t vqmovnh_u16(uint16_t __a) { + return (uint8_t)__builtin_arm64_vqmovnh_u16(__a); } + +__ai int16_t vqmovns_s32(int32_t __a) { + return (int16_t)__builtin_arm64_vqmovns_s32(__a); } +__ai uint16_t vqmovns_u32(uint32_t __a) { + return (uint16_t)__builtin_arm64_vqmovns_u32(__a); } + +__ai int8x16_t vqmovn_high_s16(int8x8_t __a, int16x8_t __b) { + return (int8x16_t)__builtin_arm64_vqmovn_high_v(__a, (int8x16_t)__b, 32); } +__ai int16x8_t vqmovn_high_s32(int16x4_t __a, int32x4_t __b) { + return (int16x8_t)__builtin_arm64_vqmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqmovn_high_s64(int32x2_t __a, int64x2_t __b) { + return (int32x4_t)__builtin_arm64_vqmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vqmovn_high_u16(uint8x8_t __a, uint16x8_t __b) { + return (uint8x16_t)__builtin_arm64_vqmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vqmovn_high_u32(uint16x4_t __a, uint32x4_t __b) { + return (uint16x8_t)__builtin_arm64_vqmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqmovn_high_u64(uint32x2_t __a, uint64x2_t __b) { + return (uint32x4_t)__builtin_arm64_vqmovn_high_v((int8x8_t)__a, (int8x16_t)__b, 50); } + +__ai uint8x8_t vqmovun_s16(int16x8_t __a) { + return (uint8x8_t)__builtin_arm64_vqmovun_v((int8x16_t)__a, 16); } +__ai uint16x4_t vqmovun_s32(int32x4_t __a) { + return (uint16x4_t)__builtin_arm64_vqmovun_v((int8x16_t)__a, 17); } +__ai uint32x2_t vqmovun_s64(int64x2_t __a) { + return (uint32x2_t)__builtin_arm64_vqmovun_v((int8x16_t)__a, 18); } + +__ai int32_t vqmovund_s64(int64_t __a) { + return (int32_t)__builtin_arm64_vqmovund_s64(__a); } + +__ai int8_t vqmovunh_s16(int16_t __a) { + return (int8_t)__builtin_arm64_vqmovunh_s16(__a); } + +__ai int16_t vqmovuns_s32(int32_t __a) { + return (int16_t)__builtin_arm64_vqmovuns_s32(__a); } + +__ai int8x16_t vqmovun_high_s16(int8x8_t __a, int16x8_t __b) { + return (int8x16_t)__builtin_arm64_vqmovun_high_v(__a, (int8x16_t)__b, 32); } +__ai int16x8_t vqmovun_high_s32(int16x4_t __a, int32x4_t __b) { + return (int16x8_t)__builtin_arm64_vqmovun_high_v((int8x8_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqmovun_high_s64(int32x2_t __a, int64x2_t __b) { + return (int32x4_t)__builtin_arm64_vqmovun_high_v((int8x8_t)__a, (int8x16_t)__b, 34); } + +__ai int8x8_t vqneg_s8(int8x8_t __a) { + return (int8x8_t)__builtin_arm64_vqneg_v(__a, 0); } +__ai int16x4_t vqneg_s16(int16x4_t __a) { + return (int16x4_t)__builtin_arm64_vqneg_v((int8x8_t)__a, 1); } +__ai int32x2_t vqneg_s32(int32x2_t __a) { + return (int32x2_t)__builtin_arm64_vqneg_v((int8x8_t)__a, 2); } +__ai int8x16_t vqnegq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_arm64_vqnegq_v(__a, 32); } +__ai int16x8_t vqnegq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_arm64_vqnegq_v((int8x16_t)__a, 33); } +__ai int32x4_t vqnegq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_arm64_vqnegq_v((int8x16_t)__a, 34); } +__ai int64x2_t vqnegq_s64(int64x2_t __a) { + return (int64x2_t)__builtin_arm64_vqnegq_v((int8x16_t)__a, 35); } + +__ai int8_t vqnegb_s8(int8_t __a) { + return (int8_t)__builtin_arm64_vqnegb_s8(__a); } + +__ai int64_t vqnegd_s64(int64_t __a) { + return (int64_t)__builtin_arm64_vqnegd_s64(__a); } + +__ai int16_t vqnegh_s16(int16_t __a) { + return (int16_t)__builtin_arm64_vqnegh_s16(__a); } + +__ai int32_t vqnegs_s32(int32_t __a) { + return (int32_t)__builtin_arm64_vqnegs_s32(__a); } + +__ai int16x4_t vqrdmulh_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqrdmulh_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int16x8_t vqrdmulhq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqrdmulhq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34); } + +#define vqrdmulh_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqrdmulh_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vqrdmulhq_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) +#define vqrdmulhq_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) + +#define vqrdmulh_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqrdmulh_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vqrdmulhq_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) +#define vqrdmulhq_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) + +__ai int16x4_t vqrdmulh_n_s16(int16x4_t __a, int16_t __b) { + return (int16x4_t)__builtin_arm64_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 1); } +__ai int32x2_t vqrdmulh_n_s32(int32x2_t __a, int32_t __b) { + return (int32x2_t)__builtin_arm64_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 2); } +__ai int16x8_t vqrdmulhq_n_s16(int16x8_t __a, int16_t __b) { + return (int16x8_t)__builtin_arm64_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); } +__ai int32x4_t vqrdmulhq_n_s32(int32x4_t __a, int32_t __b) { + return (int32x4_t)__builtin_arm64_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x4_t){ __b, __b, __b, __b }, 34); } + +__ai int8x8_t vqrshl_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vqrshl_v(__a, __b, 0); } +__ai int16x4_t vqrshl_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqrshl_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vqrshl_u8(uint8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vqrshl_v((int8x8_t)__a, __b, 16); } +__ai uint16x4_t vqrshl_u16(uint16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vqrshl_u32(uint32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vqrshlq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vqrshlq_v(__a, __b, 32); } +__ai int16x8_t vqrshlq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqrshlq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqrshlq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vqrshlq_u8(uint8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vqrshlq_v((int8x16_t)__a, __b, 48); } +__ai uint16x8_t vqrshlq_u16(uint16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqrshlq_u32(uint32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vqrshlq_u64(uint64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int8_t vqrshlb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_arm64_vqrshlb_s8(__a, __b); } +__ai uint8_t vqrshlb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_arm64_vqrshlb_u8(__a, __b); } + +__ai int64_t vqrshld_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vqrshld_s64(__a, __b); } +__ai uint64_t vqrshld_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vqrshld_u64(__a, __b); } + +__ai int16_t vqrshlh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_arm64_vqrshlh_s16(__a, __b); } +__ai uint16_t vqrshlh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_arm64_vqrshlh_u16(__a, __b); } + +__ai int32_t vqrshls_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_arm64_vqrshls_s32(__a, __b); } +__ai uint32_t vqrshls_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_arm64_vqrshls_u32(__a, __b); } + +__ai int64_t vqrshl_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vqrshl_s64(__a, __b); } +__ai uint64_t vqrshl_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vqrshl_u64(__a, __b); } + +#define vqrshrn_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)__builtin_arm64_vqrshrn_high_n_v(__a, (int8x16_t)__b, __c, 32); }) +#define vqrshrn_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)__builtin_arm64_vqrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 33); }) +#define vqrshrn_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)__builtin_arm64_vqrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 34); }) +#define vqrshrn_high_n_u16(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint16x8_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vqrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 48); }) +#define vqrshrn_high_n_u32(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint32x4_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vqrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 49); }) +#define vqrshrn_high_n_u64(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint64x2_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vqrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 50); }) + +#define vqrshrn_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int8x8_t)__builtin_arm64_vqrshrn_n_v((int8x16_t)__a, __b, 0); }) +#define vqrshrn_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int16x4_t)__builtin_arm64_vqrshrn_n_v((int8x16_t)__a, __b, 1); }) +#define vqrshrn_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vqrshrn_n_v((int8x16_t)__a, __b, 2); }) +#define vqrshrn_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vqrshrn_n_v((int8x16_t)__a, __b, 16); }) +#define vqrshrn_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vqrshrn_n_v((int8x16_t)__a, __b, 17); }) +#define vqrshrn_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vqrshrn_n_v((int8x16_t)__a, __b, 18); }) + +#define vqrshrun_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)__builtin_arm64_vqrshrun_high_n_v(__a, (int8x16_t)__b, __c, 32); }) +#define vqrshrun_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)__builtin_arm64_vqrshrun_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 33); }) +#define vqrshrun_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)__builtin_arm64_vqrshrun_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 34); }) + +#define vqrshrun_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vqrshrun_n_v((int8x16_t)__a, __b, 16); }) +#define vqrshrun_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vqrshrun_n_v((int8x16_t)__a, __b, 17); }) +#define vqrshrun_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vqrshrun_n_v((int8x16_t)__a, __b, 18); }) + +__ai int8x8_t vqshl_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vqshl_v(__a, __b, 0); } +__ai int16x4_t vqshl_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqshl_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vqshl_u8(uint8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vqshl_v((int8x8_t)__a, __b, 16); } +__ai uint16x4_t vqshl_u16(uint16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vqshl_u32(uint32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vqshlq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vqshlq_v(__a, __b, 32); } +__ai int16x8_t vqshlq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqshlq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqshlq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vqshlq_u8(uint8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vqshlq_v((int8x16_t)__a, __b, 48); } +__ai uint16x8_t vqshlq_u16(uint16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqshlq_u32(uint32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vqshlq_u64(uint64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int8_t vqshlb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_arm64_vqshlb_s8(__a, __b); } +__ai uint8_t vqshlb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_arm64_vqshlb_u8(__a, __b); } + +__ai int64_t vqshld_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vqshld_s64(__a, __b); } +__ai uint64_t vqshld_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vqshld_u64(__a, __b); } + +__ai int16_t vqshlh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_arm64_vqshlh_s16(__a, __b); } +__ai uint16_t vqshlh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_arm64_vqshlh_u16(__a, __b); } + +__ai int32_t vqshls_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_arm64_vqshls_s32(__a, __b); } +__ai uint32_t vqshls_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_arm64_vqshls_u32(__a, __b); } + +#define vqshlu_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vqshlu_n_v(__a, __b, 16); }) +#define vqshlu_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vqshlu_n_v((int8x8_t)__a, __b, 17); }) +#define vqshlu_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vqshlu_n_v((int8x8_t)__a, __b, 18); }) +#define vqshlu_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (uint64x1_t)__builtin_arm64_vqshlu_n_v((int8x8_t)__a, __b, 19); }) +#define vqshluq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (uint8x16_t)__builtin_arm64_vqshluq_n_v(__a, __b, 48); }) +#define vqshluq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (uint16x8_t)__builtin_arm64_vqshluq_n_v((int8x16_t)__a, __b, 49); }) +#define vqshluq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (uint32x4_t)__builtin_arm64_vqshluq_n_v((int8x16_t)__a, __b, 50); }) +#define vqshluq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (uint64x2_t)__builtin_arm64_vqshluq_n_v((int8x16_t)__a, __b, 51); }) + +__ai int64_t vqshl_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vqshl_s64(__a, __b); } +__ai uint64_t vqshl_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vqshl_u64(__a, __b); } + +#define vqshl_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8x8_t)__builtin_arm64_vqshl_n_v(__a, __b, 0); }) +#define vqshl_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16x4_t)__builtin_arm64_vqshl_n_v((int8x8_t)__a, __b, 1); }) +#define vqshl_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vqshl_n_v((int8x8_t)__a, __b, 2); }) +#define vqshl_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64x1_t)__builtin_arm64_vqshl_n_v((int8x8_t)__a, __b, 3); }) +#define vqshl_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vqshl_n_v((int8x8_t)__a, __b, 16); }) +#define vqshl_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vqshl_n_v((int8x8_t)__a, __b, 17); }) +#define vqshl_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vqshl_n_v((int8x8_t)__a, __b, 18); }) +#define vqshl_n_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64x1_t)__builtin_arm64_vqshl_n_v((int8x8_t)__a, __b, 19); }) +#define vqshlq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8x16_t)__builtin_arm64_vqshlq_n_v(__a, __b, 32); }) +#define vqshlq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16x8_t)__builtin_arm64_vqshlq_n_v((int8x16_t)__a, __b, 33); }) +#define vqshlq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32x4_t)__builtin_arm64_vqshlq_n_v((int8x16_t)__a, __b, 34); }) +#define vqshlq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64x2_t)__builtin_arm64_vqshlq_n_v((int8x16_t)__a, __b, 35); }) +#define vqshlq_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8x16_t)__builtin_arm64_vqshlq_n_v((int8x16_t)__a, __b, 48); }) +#define vqshlq_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16x8_t)__builtin_arm64_vqshlq_n_v((int8x16_t)__a, __b, 49); }) +#define vqshlq_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32x4_t)__builtin_arm64_vqshlq_n_v((int8x16_t)__a, __b, 50); }) +#define vqshlq_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64x2_t)__builtin_arm64_vqshlq_n_v((int8x16_t)__a, __b, 51); }) + +#define vqshrn_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)__builtin_arm64_vqshrn_high_n_v(__a, (int8x16_t)__b, __c, 32); }) +#define vqshrn_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)__builtin_arm64_vqshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 33); }) +#define vqshrn_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)__builtin_arm64_vqshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 34); }) +#define vqshrn_high_n_u16(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint16x8_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vqshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 48); }) +#define vqshrn_high_n_u32(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint32x4_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vqshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 49); }) +#define vqshrn_high_n_u64(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint64x2_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vqshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 50); }) + +#define vqshrn_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int8x8_t)__builtin_arm64_vqshrn_n_v((int8x16_t)__a, __b, 0); }) +#define vqshrn_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int16x4_t)__builtin_arm64_vqshrn_n_v((int8x16_t)__a, __b, 1); }) +#define vqshrn_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vqshrn_n_v((int8x16_t)__a, __b, 2); }) +#define vqshrn_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vqshrn_n_v((int8x16_t)__a, __b, 16); }) +#define vqshrn_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vqshrn_n_v((int8x16_t)__a, __b, 17); }) +#define vqshrn_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vqshrn_n_v((int8x16_t)__a, __b, 18); }) + +#define vqshrun_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)__builtin_arm64_vqshrun_high_n_v(__a, (int8x16_t)__b, __c, 32); }) +#define vqshrun_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)__builtin_arm64_vqshrun_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 33); }) +#define vqshrun_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)__builtin_arm64_vqshrun_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 34); }) + +#define vqshrun_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vqshrun_n_v((int8x16_t)__a, __b, 16); }) +#define vqshrun_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vqshrun_n_v((int8x16_t)__a, __b, 17); }) +#define vqshrun_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vqshrun_n_v((int8x16_t)__a, __b, 18); }) + +__ai int8_t vqsubb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_arm64_vqsubb_s8(__a, __b); } +__ai uint8_t vqsubb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_arm64_vqsubb_u8(__a, __b); } + +__ai int16_t vqsubh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_arm64_vqsubh_s16(__a, __b); } +__ai uint16_t vqsubh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_arm64_vqsubh_u16(__a, __b); } + +__ai uint8x16_t vqtbl1q_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vqtbl1q_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai int8x16_t vqtbl1q_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vqtbl1q_v(__a, __b, 32); } +__ai poly8x16_t vqtbl1q_p8(poly8x16_t __a, poly8x16_t __b) { + return (poly8x16_t)__builtin_arm64_vqtbl1q_v((int8x16_t)__a, (int8x16_t)__b, 36); } + +__ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vqtbl2q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__b, 48); } +__ai int8x16_t vqtbl2q_s8(int8x16x2_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vqtbl2q_v(__a.val[0], __a.val[1], __b, 32); } +__ai poly8x16_t vqtbl2q_p8(poly8x16x2_t __a, poly8x16_t __b) { + return (poly8x16_t)__builtin_arm64_vqtbl2q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__b, 36); } + +__ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vqtbl3q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__b, 48); } +__ai int8x16_t vqtbl3q_s8(int8x16x3_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vqtbl3q_v(__a.val[0], __a.val[1], __a.val[2], __b, 32); } +__ai poly8x16_t vqtbl3q_p8(poly8x16x3_t __a, poly8x16_t __b) { + return (poly8x16_t)__builtin_arm64_vqtbl3q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__b, 36); } + +__ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vqtbl4q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__a.val[3], (int8x16_t)__b, 48); } +__ai int8x16_t vqtbl4q_s8(int8x16x4_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vqtbl4q_v(__a.val[0], __a.val[1], __a.val[2], __a.val[3], __b, 32); } +__ai poly8x16_t vqtbl4q_p8(poly8x16x4_t __a, poly8x16_t __b) { + return (poly8x16_t)__builtin_arm64_vqtbl4q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__a.val[3], (int8x16_t)__b, 36); } + +__ai uint8x16_t vqtbx1q_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return (uint8x16_t)__builtin_arm64_vqtbx1q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); } +__ai int8x16_t vqtbx1q_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { + return (int8x16_t)__builtin_arm64_vqtbx1q_v(__a, __b, __c, 32); } +__ai poly8x16_t vqtbx1q_p8(poly8x16_t __a, poly8x16_t __b, poly8x16_t __c) { + return (poly8x16_t)__builtin_arm64_vqtbx1q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 36); } + +__ai uint8x16_t vqtbx2q_u8(uint8x16_t __a, uint8x16x2_t __b, uint8x16_t __c) { + return (uint8x16_t)__builtin_arm64_vqtbx2q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__c, 48); } +__ai int8x16_t vqtbx2q_s8(int8x16_t __a, int8x16x2_t __b, int8x16_t __c) { + return (int8x16_t)__builtin_arm64_vqtbx2q_v(__a, __b.val[0], __b.val[1], __c, 32); } +__ai poly8x16_t vqtbx2q_p8(poly8x16_t __a, poly8x16x2_t __b, poly8x16_t __c) { + return (poly8x16_t)__builtin_arm64_vqtbx2q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__c, 36); } + +__ai uint8x16_t vqtbx3q_u8(uint8x16_t __a, uint8x16x3_t __b, uint8x16_t __c) { + return (uint8x16_t)__builtin_arm64_vqtbx3q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__c, 48); } +__ai int8x16_t vqtbx3q_s8(int8x16_t __a, int8x16x3_t __b, int8x16_t __c) { + return (int8x16_t)__builtin_arm64_vqtbx3q_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 32); } +__ai poly8x16_t vqtbx3q_p8(poly8x16_t __a, poly8x16x3_t __b, poly8x16_t __c) { + return (poly8x16_t)__builtin_arm64_vqtbx3q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__c, 36); } + +__ai uint8x16_t vqtbx4q_u8(uint8x16_t __a, uint8x16x4_t __b, uint8x16_t __c) { + return (uint8x16_t)__builtin_arm64_vqtbx4q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], (int8x16_t)__c, 48); } +__ai int8x16_t vqtbx4q_s8(int8x16_t __a, int8x16x4_t __b, int8x16_t __c) { + return (int8x16_t)__builtin_arm64_vqtbx4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 32); } +__ai poly8x16_t vqtbx4q_p8(poly8x16_t __a, poly8x16x4_t __b, poly8x16_t __c) { + return (poly8x16_t)__builtin_arm64_vqtbx4q_v((int8x16_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], (int8x16_t)__c, 36); } + +__ai int8x8_t vraddhn_s16(int16x8_t __a, int16x8_t __b) { + return (int8x8_t)__builtin_arm64_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } +__ai int16x4_t vraddhn_s32(int32x4_t __a, int32x4_t __b) { + return (int16x4_t)__builtin_arm64_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } +__ai int32x2_t vraddhn_s64(int64x2_t __a, int64x2_t __b) { + return (int32x2_t)__builtin_arm64_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } +__ai uint8x8_t vraddhn_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint8x8_t)__builtin_arm64_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } +__ai uint16x4_t vraddhn_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint16x4_t)__builtin_arm64_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } +__ai uint32x2_t vraddhn_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint32x2_t)__builtin_arm64_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } + +__ai int8x16_t vraddhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) { + return (int8x16_t)__builtin_arm64_vraddhn_high_v(__a, (int8x16_t)__b, (int8x16_t)__c, 32); } +__ai int16x8_t vraddhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) { + return (int16x8_t)__builtin_arm64_vraddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 33); } +__ai int32x4_t vraddhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) { + return (int32x4_t)__builtin_arm64_vraddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } +__ai uint8x16_t vraddhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return (uint8x16_t)__builtin_arm64_vraddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); } +__ai uint16x8_t vraddhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint16x8_t)__builtin_arm64_vraddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 49); } +__ai uint32x4_t vraddhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return (uint32x4_t)__builtin_arm64_vraddhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai int8x8_t vrbit_s8(int8x8_t __a) { + return (int8x8_t)__builtin_arm64_vrbit_v(__a, 0); } +__ai uint8x8_t vrbit_u8(uint8x8_t __a) { + return (uint8x8_t)__builtin_arm64_vrbit_v((int8x8_t)__a, 16); } +__ai int8x16_t vrbitq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_arm64_vrbitq_v(__a, 32); } +__ai uint8x16_t vrbitq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_arm64_vrbitq_v((int8x16_t)__a, 48); } + +__ai float32x2_t vrecpe_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrecpe_v((int8x8_t)__a, 7); } +__ai uint32x2_t vrecpe_u32(uint32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vrecpe_v((int8x8_t)__a, 18); } +__ai float32x4_t vrecpeq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrecpeq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrecpeq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrecpeq_v((int8x16_t)__a, 40); } +__ai uint32x4_t vrecpeq_u32(uint32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vrecpeq_v((int8x16_t)__a, 50); } + +__ai float32x2_t vrecps_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vrecpsq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vrecpsq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float64_t vrecpsd_f64(float64_t __a, float64_t __b) { + return (float64_t)__builtin_arm64_vrecpsd_f64(__a, __b); } + +__ai float32_t vrecpss_f32(float32_t __a, float32_t __b) { + return (float32_t)__builtin_arm64_vrecpss_f32(__a, __b); } + +__ai int8x8_t vreinterpret_s8_s16(int16x4_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_s32(int32x2_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_s64(int64x1_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_f16(float16x4_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_f32(float32x2_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __a) { + return (int8x8_t)__a; } +__ai int16x4_t vreinterpret_s16_s8(int8x8_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_s32(int32x2_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_s64(int64x1_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_f16(float16x4_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_f32(float32x2_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __a) { + return (int16x4_t)__a; } +__ai int32x2_t vreinterpret_s32_s8(int8x8_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_s16(int16x4_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_s64(int64x1_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_f16(float16x4_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_f32(float32x2_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __a) { + return (int32x2_t)__a; } +__ai int64x1_t vreinterpret_s64_s8(int8x8_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_s16(int16x4_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_s32(int32x2_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_f16(float16x4_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_f32(float32x2_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __a) { + return (int64x1_t)__a; } +__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __a) { + return (uint8x8_t)__a; } +__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __a) { + return (uint16x4_t)__a; } +__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __a) { + return (uint32x2_t)__a; } +__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __a) { + return (uint64x1_t)__a; } +__ai float16x4_t vreinterpret_f16_s8(int8x8_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_s16(int16x4_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_s32(int32x2_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_s64(int64x1_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_f32(float32x2_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __a) { + return (float16x4_t)__a; } +__ai float32x2_t vreinterpret_f32_s8(int8x8_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_s16(int16x4_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_s32(int32x2_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_s64(int64x1_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_f16(float16x4_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __a) { + return (float32x2_t)__a; } +__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __a) { + return (poly8x8_t)__a; } +__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __a) { + return (poly16x4_t)__a; } +__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __a) { + return (int8x16_t)__a; } +__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __a) { + return (int16x8_t)__a; } +__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __a) { + return (int32x4_t)__a; } +__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __a) { + return (int64x2_t)__a; } +__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __a) { + return (uint8x16_t)__a; } +__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __a) { + return (uint16x8_t)__a; } +__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __a) { + return (uint32x4_t)__a; } +__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __a) { + return (uint64x2_t)__a; } +__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __a) { + return (float16x8_t)__a; } +__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __a) { + return (float32x4_t)__a; } +__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __a) { + return (poly8x16_t)__a; } +__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __a) { + return (poly16x8_t)__a; } + +__ai int8x8_t vrev16_s8(int8x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai uint8x8_t vrev16_u8(uint8x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai poly8x8_t vrev16_p8(poly8x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai int8x16_t vrev16q_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } +__ai uint8x16_t vrev16q_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } +__ai poly8x16_t vrev16q_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } + +__ai int8x8_t vrev32_s8(int8x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai int16x4_t vrev32_s16(int16x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai uint8x8_t vrev32_u8(uint8x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai uint16x4_t vrev32_u16(uint16x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai poly8x8_t vrev32_p8(poly8x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai poly16x4_t vrev32_p16(poly16x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai int8x16_t vrev32q_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } +__ai int16x8_t vrev32q_s16(int16x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai uint8x16_t vrev32q_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } +__ai uint16x8_t vrev32q_u16(uint16x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai poly8x16_t vrev32q_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } +__ai poly16x8_t vrev32q_p16(poly16x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } + +__ai int8x8_t vrev64_s8(int8x8_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } +__ai int16x4_t vrev64_s16(int16x4_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } +__ai int32x2_t vrev64_s32(int32x2_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0); } +__ai uint8x8_t vrev64_u8(uint8x8_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } +__ai uint16x4_t vrev64_u16(uint16x4_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } +__ai uint32x2_t vrev64_u32(uint32x2_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0); } +__ai poly8x8_t vrev64_p8(poly8x8_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } +__ai poly16x4_t vrev64_p16(poly16x4_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } +__ai float32x2_t vrev64_f32(float32x2_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0); } +__ai int8x16_t vrev64q_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } +__ai int16x8_t vrev64q_s16(int16x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai int32x4_t vrev64q_s32(int32x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai uint8x16_t vrev64q_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } +__ai uint16x8_t vrev64q_u16(uint16x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai uint32x4_t vrev64q_u32(uint32x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai poly8x16_t vrev64q_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } +__ai poly16x8_t vrev64q_p16(poly16x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai float32x4_t vrev64q_f32(float32x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } + +__ai int8x8_t vrhadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vrhadd_v(__a, __b, 0); } +__ai int16x4_t vrhadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vrhadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vrhadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vrhadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vrhadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vrhaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vrhaddq_v(__a, __b, 32); } +__ai int16x8_t vrhaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vrhaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vrhaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vrhaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vrhaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai float32x2_t vrnd_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrnd_v((int8x8_t)__a, 7); } +__ai float32x4_t vrndq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrndq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrndq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrndq_v((int8x16_t)__a, 40); } + +__ai float32x2_t vrnda_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrnda_v((int8x8_t)__a, 7); } +__ai float32x4_t vrndaq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrndaq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrndaq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrndaq_v((int8x16_t)__a, 40); } + +__ai float32x2_t vrndm_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrndm_v((int8x8_t)__a, 7); } +__ai float32x4_t vrndmq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrndmq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrndmq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrndmq_v((int8x16_t)__a, 40); } + +__ai float32x2_t vrndn_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrndn_v((int8x8_t)__a, 7); } +__ai float32x4_t vrndnq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrndnq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrndnq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrndnq_v((int8x16_t)__a, 40); } + +__ai float32x2_t vrndp_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrndp_v((int8x8_t)__a, 7); } +__ai float32x4_t vrndpq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrndpq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrndpq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrndpq_v((int8x16_t)__a, 40); } + +__ai float32x2_t vrndx_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrndx_v((int8x8_t)__a, 7); } +__ai float32x4_t vrndxq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrndxq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrndxq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrndxq_v((int8x16_t)__a, 40); } + +__ai float32x2_t vrndz_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrndz_v((int8x8_t)__a, 7); } +__ai float32x4_t vrndzq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrndzq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrndzq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrndzq_v((int8x16_t)__a, 40); } + +__ai int8x8_t vrshl_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vrshl_v(__a, __b, 0); } +__ai int16x4_t vrshl_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vrshl_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vrshl_u8(uint8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vrshl_v((int8x8_t)__a, __b, 16); } +__ai uint16x4_t vrshl_u16(uint16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vrshl_u32(uint32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vrshlq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vrshlq_v(__a, __b, 32); } +__ai int16x8_t vrshlq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vrshlq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vrshlq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vrshlq_u8(uint8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vrshlq_v((int8x16_t)__a, __b, 48); } +__ai uint16x8_t vrshlq_u16(uint16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vrshlq_u32(uint32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vrshlq_u64(uint64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int64_t vrshld_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vrshld_s64(__a, __b); } +__ai uint64_t vrshld_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vrshld_u64(__a, __b); } + +__ai int64_t vrshl_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vrshl_s64(__a, __b); } +__ai uint64_t vrshl_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vrshl_u64(__a, __b); } + +#define vrshrn_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)__builtin_arm64_vrshrn_high_n_v(__a, (int8x16_t)__b, __c, 32); }) +#define vrshrn_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)__builtin_arm64_vrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 33); }) +#define vrshrn_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)__builtin_arm64_vrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 34); }) +#define vrshrn_high_n_u16(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint16x8_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 48); }) +#define vrshrn_high_n_u32(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint32x4_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 49); }) +#define vrshrn_high_n_u64(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint64x2_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vrshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 50); }) + +#define vrshrn_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int8x8_t)__builtin_arm64_vrshrn_n_v((int8x16_t)__a, __b, 0); }) +#define vrshrn_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int16x4_t)__builtin_arm64_vrshrn_n_v((int8x16_t)__a, __b, 1); }) +#define vrshrn_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vrshrn_n_v((int8x16_t)__a, __b, 2); }) +#define vrshrn_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vrshrn_n_v((int8x16_t)__a, __b, 16); }) +#define vrshrn_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vrshrn_n_v((int8x16_t)__a, __b, 17); }) +#define vrshrn_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vrshrn_n_v((int8x16_t)__a, __b, 18); }) + +#define vrshr_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8x8_t)__builtin_arm64_vrshr_n_v(__a, __b, 0); }) +#define vrshr_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16x4_t)__builtin_arm64_vrshr_n_v((int8x8_t)__a, __b, 1); }) +#define vrshr_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vrshr_n_v((int8x8_t)__a, __b, 2); }) +#define vrshr_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64x1_t)__builtin_arm64_vrshr_n_v((int8x8_t)__a, __b, 3); }) +#define vrshr_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vrshr_n_v((int8x8_t)__a, __b, 16); }) +#define vrshr_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vrshr_n_v((int8x8_t)__a, __b, 17); }) +#define vrshr_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vrshr_n_v((int8x8_t)__a, __b, 18); }) +#define vrshr_n_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64x1_t)__builtin_arm64_vrshr_n_v((int8x8_t)__a, __b, 19); }) +#define vrshrq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8x16_t)__builtin_arm64_vrshrq_n_v(__a, __b, 32); }) +#define vrshrq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16x8_t)__builtin_arm64_vrshrq_n_v((int8x16_t)__a, __b, 33); }) +#define vrshrq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32x4_t)__builtin_arm64_vrshrq_n_v((int8x16_t)__a, __b, 34); }) +#define vrshrq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64x2_t)__builtin_arm64_vrshrq_n_v((int8x16_t)__a, __b, 35); }) +#define vrshrq_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8x16_t)__builtin_arm64_vrshrq_n_v((int8x16_t)__a, __b, 48); }) +#define vrshrq_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16x8_t)__builtin_arm64_vrshrq_n_v((int8x16_t)__a, __b, 49); }) +#define vrshrq_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32x4_t)__builtin_arm64_vrshrq_n_v((int8x16_t)__a, __b, 50); }) +#define vrshrq_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64x2_t)__builtin_arm64_vrshrq_n_v((int8x16_t)__a, __b, 51); }) + +__ai float32x2_t vrsqrte_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vrsqrte_v((int8x8_t)__a, 7); } +__ai uint32x2_t vrsqrte_u32(uint32x2_t __a) { + return (uint32x2_t)__builtin_arm64_vrsqrte_v((int8x8_t)__a, 18); } +__ai float32x4_t vrsqrteq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vrsqrteq_v((int8x16_t)__a, 39); } +__ai float64x2_t vrsqrteq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vrsqrteq_v((int8x16_t)__a, 40); } +__ai uint32x4_t vrsqrteq_u32(uint32x4_t __a) { + return (uint32x4_t)__builtin_arm64_vrsqrteq_v((int8x16_t)__a, 50); } + +__ai float32x2_t vrsqrts_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vrsqrtsq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vrsqrtsq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float64_t vrsqrtsd_f64(float64_t __a, float64_t __b) { + return (float64_t)__builtin_arm64_vrsqrtsd_f64(__a, __b); } + +__ai float32_t vrsqrtss_f32(float32_t __a, float32_t __b) { + return (float32_t)__builtin_arm64_vrsqrtss_f32(__a, __b); } + +#define vrsra_n_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_arm64_vrsra_n_v(__a, __b, __c, 0); }) +#define vrsra_n_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_arm64_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vrsra_n_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_arm64_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vrsra_n_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_arm64_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vrsra_n_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_arm64_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vrsra_n_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_arm64_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vrsra_n_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_arm64_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vrsra_n_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_arm64_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vrsraq_n_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_arm64_vrsraq_n_v(__a, __b, __c, 32); }) +#define vrsraq_n_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_arm64_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vrsraq_n_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_arm64_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vrsraq_n_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_arm64_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vrsraq_n_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vrsraq_n_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vrsraq_n_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vrsraq_n_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_arm64_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) + +__ai int8x8_t vrsubhn_s16(int16x8_t __a, int16x8_t __b) { + return (int8x8_t)__builtin_arm64_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } +__ai int16x4_t vrsubhn_s32(int32x4_t __a, int32x4_t __b) { + return (int16x4_t)__builtin_arm64_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } +__ai int32x2_t vrsubhn_s64(int64x2_t __a, int64x2_t __b) { + return (int32x2_t)__builtin_arm64_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } +__ai uint8x8_t vrsubhn_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint8x8_t)__builtin_arm64_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } +__ai uint16x4_t vrsubhn_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint16x4_t)__builtin_arm64_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } +__ai uint32x2_t vrsubhn_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint32x2_t)__builtin_arm64_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } + +__ai int8x16_t vrsubhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) { + return (int8x16_t)__builtin_arm64_vrsubhn_high_v(__a, (int8x16_t)__b, (int8x16_t)__c, 32); } +__ai int16x8_t vrsubhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) { + return (int16x8_t)__builtin_arm64_vrsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 33); } +__ai int32x4_t vrsubhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) { + return (int32x4_t)__builtin_arm64_vrsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } +__ai uint8x16_t vrsubhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return (uint8x16_t)__builtin_arm64_vrsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); } +__ai uint16x8_t vrsubhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint16x8_t)__builtin_arm64_vrsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 49); } +__ai uint32x4_t vrsubhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return (uint32x4_t)__builtin_arm64_vrsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +#define vset_lane_u8(a, b, __c) __extension__ ({ \ + uint8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_arm64_vset_lane_i8(__a, (int8x8_t)__b, __c); }) +#define vset_lane_u16(a, b, __c) __extension__ ({ \ + uint16_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_arm64_vset_lane_i16(__a, (int16x4_t)__b, __c); }) +#define vset_lane_u32(a, b, __c) __extension__ ({ \ + uint32_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_arm64_vset_lane_i32(__a, (int32x2_t)__b, __c); }) +#define vset_lane_s8(a, b, __c) __extension__ ({ \ + int8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_arm64_vset_lane_i8(__a, __b, __c); }) +#define vset_lane_s16(a, b, __c) __extension__ ({ \ + int16_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_arm64_vset_lane_i16(__a, __b, __c); }) +#define vset_lane_s32(a, b, __c) __extension__ ({ \ + int32_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_arm64_vset_lane_i32(__a, __b, __c); }) +#define vset_lane_p8(a, b, __c) __extension__ ({ \ + poly8_t __a = (a); poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_arm64_vset_lane_i8(__a, (int8x8_t)__b, __c); }) +#define vset_lane_p16(a, b, __c) __extension__ ({ \ + poly16_t __a = (a); poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_arm64_vset_lane_i16(__a, (int16x4_t)__b, __c); }) +#define vset_lane_f16(a, b, __c) __extension__ ({ \ + float16_t __a = (a); float16x4_t __b = (b); \ + (float16x4_t)__builtin_arm64_vset_lane_f16(__a, __b, __c); }) +#define vset_lane_f32(a, b, __c) __extension__ ({ \ + float32_t __a = (a); float32x2_t __b = (b); \ + (float32x2_t)__builtin_arm64_vset_lane_f32(__a, __b, __c); }) +#define vset_lane_f64(a, b, __c) __extension__ ({ \ + float64_t __a = (a); float64x1_t __b = (b); \ + (float64x1_t)__builtin_arm64_vset_lane_f64(__a, __b, __c); }) +#define vsetq_lane_u8(a, b, __c) __extension__ ({ \ + uint8_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vsetq_lane_i8(__a, (int8x16_t)__b, __c); }) +#define vsetq_lane_u16(a, b, __c) __extension__ ({ \ + uint16_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vsetq_lane_i16(__a, (int16x8_t)__b, __c); }) +#define vsetq_lane_u32(a, b, __c) __extension__ ({ \ + uint32_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vsetq_lane_i32(__a, (int32x4_t)__b, __c); }) +#define vsetq_lane_s8(a, b, __c) __extension__ ({ \ + int8_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_arm64_vsetq_lane_i8(__a, __b, __c); }) +#define vsetq_lane_s16(a, b, __c) __extension__ ({ \ + int16_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_arm64_vsetq_lane_i16(__a, __b, __c); }) +#define vsetq_lane_s32(a, b, __c) __extension__ ({ \ + int32_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_arm64_vsetq_lane_i32(__a, __b, __c); }) +#define vsetq_lane_p8(a, b, __c) __extension__ ({ \ + poly8_t __a = (a); poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_arm64_vsetq_lane_i8(__a, (int8x16_t)__b, __c); }) +#define vsetq_lane_p16(a, b, __c) __extension__ ({ \ + poly16_t __a = (a); poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_arm64_vsetq_lane_i16(__a, (int16x8_t)__b, __c); }) +#define vsetq_lane_f16(a, b, __c) __extension__ ({ \ + float16_t __a = (a); float16x8_t __b = (b); \ + (float16x8_t)__builtin_arm64_vsetq_lane_f16(__a, __b, __c); }) +#define vsetq_lane_f32(a, b, __c) __extension__ ({ \ + float32_t __a = (a); float32x4_t __b = (b); \ + (float32x4_t)__builtin_arm64_vsetq_lane_f32(__a, __b, __c); }) +#define vsetq_lane_f64(a, b, __c) __extension__ ({ \ + float64_t __a = (a); float64x2_t __b = (b); \ + (float64x2_t)__builtin_arm64_vsetq_lane_f64(__a, __b, __c); }) +#define vset_lane_s64(a, b, __c) __extension__ ({ \ + int64_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_arm64_vset_lane_i64(__a, __b, __c); }) +#define vset_lane_u64(a, b, __c) __extension__ ({ \ + uint64_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_arm64_vset_lane_i64(__a, (int64x1_t)__b, __c); }) +#define vsetq_lane_s64(a, b, __c) __extension__ ({ \ + int64_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_arm64_vsetq_lane_i64(__a, __b, __c); }) +#define vsetq_lane_u64(a, b, __c) __extension__ ({ \ + uint64_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_arm64_vsetq_lane_i64(__a, (int64x2_t)__b, __c); }) + +__ai uint32x4_t vsha1cq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_arm64_vsha1cq_u32((int32x4_t)__a, __b, (int32x4_t)__c); } + +__ai uint32_t vsha1h_u32(uint32_t __a) { + return (uint32_t)__builtin_arm64_vsha1h_u32(__a); } + +__ai uint32x4_t vsha1mq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_arm64_vsha1mq_u32((int32x4_t)__a, __b, (int32x4_t)__c); } + +__ai uint32x4_t vsha1pq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_arm64_vsha1pq_u32((int32x4_t)__a, __b, (int32x4_t)__c); } + +__ai uint32x4_t vsha1su0q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_arm64_vsha1su0q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai uint32x4_t vsha1su1q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vsha1su1q_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai uint32x4_t vsha256hq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_arm64_vsha256hq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai uint32x4_t vsha256h2q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_arm64_vsha256h2q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai uint32x4_t vsha256su0q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vsha256su0q_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai uint32x4_t vsha256su1q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_arm64_vsha256su1q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai int8x8_t vshl_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vshl_v(__a, __b, 0); } +__ai int16x4_t vshl_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vshl_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vshl_u8(uint8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vshl_v((int8x8_t)__a, __b, 16); } +__ai uint16x4_t vshl_u16(uint16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vshl_u32(uint32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vshlq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vshlq_v(__a, __b, 32); } +__ai int16x8_t vshlq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vshlq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vshlq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vshlq_u8(uint8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vshlq_v((int8x16_t)__a, __b, 48); } +__ai uint16x8_t vshlq_u16(uint16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vshlq_u32(uint32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vshlq_u64(uint64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int64_t vshld_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vshld_s64(__a, __b); } +__ai uint64_t vshld_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vshld_u64(__a, __b); } + +#define vshll_high_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int16x8_t)__builtin_arm64_vshll_high_n_v(__a, __b, 33); }) +#define vshll_high_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int32x4_t)__builtin_arm64_vshll_high_n_v((int8x16_t)__a, __b, 34); }) +#define vshll_high_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int64x2_t)__builtin_arm64_vshll_high_n_v((int8x16_t)__a, __b, 35); }) +#define vshll_high_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint16x8_t)__builtin_arm64_vshll_high_n_v((int8x16_t)__a, __b, 49); }) +#define vshll_high_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint32x4_t)__builtin_arm64_vshll_high_n_v((int8x16_t)__a, __b, 50); }) +#define vshll_high_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint64x2_t)__builtin_arm64_vshll_high_n_v((int8x16_t)__a, __b, 51); }) + +#define vshll_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int16x8_t)__builtin_arm64_vshll_n_v(__a, __b, 33); }) +#define vshll_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int32x4_t)__builtin_arm64_vshll_n_v((int8x8_t)__a, __b, 34); }) +#define vshll_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int64x2_t)__builtin_arm64_vshll_n_v((int8x8_t)__a, __b, 35); }) +#define vshll_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint16x8_t)__builtin_arm64_vshll_n_v((int8x8_t)__a, __b, 49); }) +#define vshll_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint32x4_t)__builtin_arm64_vshll_n_v((int8x8_t)__a, __b, 50); }) +#define vshll_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint64x2_t)__builtin_arm64_vshll_n_v((int8x8_t)__a, __b, 51); }) + +__ai int64_t vshl_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vshl_s64(__a, __b); } +__ai uint64_t vshl_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vshl_u64(__a, __b); } + +#define vshl_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8x8_t)__builtin_arm64_vshl_n_v(__a, __b, 0); }) +#define vshl_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16x4_t)__builtin_arm64_vshl_n_v((int8x8_t)__a, __b, 1); }) +#define vshl_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vshl_n_v((int8x8_t)__a, __b, 2); }) +#define vshl_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64x1_t)__builtin_arm64_vshl_n_v((int8x8_t)__a, __b, 3); }) +#define vshl_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vshl_n_v((int8x8_t)__a, __b, 16); }) +#define vshl_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vshl_n_v((int8x8_t)__a, __b, 17); }) +#define vshl_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vshl_n_v((int8x8_t)__a, __b, 18); }) +#define vshl_n_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64x1_t)__builtin_arm64_vshl_n_v((int8x8_t)__a, __b, 19); }) +#define vshlq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8x16_t)__builtin_arm64_vshlq_n_v(__a, __b, 32); }) +#define vshlq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16x8_t)__builtin_arm64_vshlq_n_v((int8x16_t)__a, __b, 33); }) +#define vshlq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32x4_t)__builtin_arm64_vshlq_n_v((int8x16_t)__a, __b, 34); }) +#define vshlq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64x2_t)__builtin_arm64_vshlq_n_v((int8x16_t)__a, __b, 35); }) +#define vshlq_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8x16_t)__builtin_arm64_vshlq_n_v((int8x16_t)__a, __b, 48); }) +#define vshlq_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16x8_t)__builtin_arm64_vshlq_n_v((int8x16_t)__a, __b, 49); }) +#define vshlq_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32x4_t)__builtin_arm64_vshlq_n_v((int8x16_t)__a, __b, 50); }) +#define vshlq_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64x2_t)__builtin_arm64_vshlq_n_v((int8x16_t)__a, __b, 51); }) + +#define vshrn_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)__builtin_arm64_vshrn_high_n_v(__a, (int8x16_t)__b, __c, 32); }) +#define vshrn_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)__builtin_arm64_vshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 33); }) +#define vshrn_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)__builtin_arm64_vshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 34); }) +#define vshrn_high_n_u16(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint16x8_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 48); }) +#define vshrn_high_n_u32(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint32x4_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 49); }) +#define vshrn_high_n_u64(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint64x2_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vshrn_high_n_v((int8x8_t)__a, (int8x16_t)__b, __c, 50); }) + +#define vshrn_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int8x8_t)__builtin_arm64_vshrn_n_v((int8x16_t)__a, __b, 0); }) +#define vshrn_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int16x4_t)__builtin_arm64_vshrn_n_v((int8x16_t)__a, __b, 1); }) +#define vshrn_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vshrn_n_v((int8x16_t)__a, __b, 2); }) +#define vshrn_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vshrn_n_v((int8x16_t)__a, __b, 16); }) +#define vshrn_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vshrn_n_v((int8x16_t)__a, __b, 17); }) +#define vshrn_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vshrn_n_v((int8x16_t)__a, __b, 18); }) + +#define vshr_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8x8_t)__builtin_arm64_vshr_n_v(__a, __b, 0); }) +#define vshr_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16x4_t)__builtin_arm64_vshr_n_v((int8x8_t)__a, __b, 1); }) +#define vshr_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32x2_t)__builtin_arm64_vshr_n_v((int8x8_t)__a, __b, 2); }) +#define vshr_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64x1_t)__builtin_arm64_vshr_n_v((int8x8_t)__a, __b, 3); }) +#define vshr_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8x8_t)__builtin_arm64_vshr_n_v((int8x8_t)__a, __b, 16); }) +#define vshr_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16x4_t)__builtin_arm64_vshr_n_v((int8x8_t)__a, __b, 17); }) +#define vshr_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32x2_t)__builtin_arm64_vshr_n_v((int8x8_t)__a, __b, 18); }) +#define vshr_n_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64x1_t)__builtin_arm64_vshr_n_v((int8x8_t)__a, __b, 19); }) +#define vshrq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8x16_t)__builtin_arm64_vshrq_n_v(__a, __b, 32); }) +#define vshrq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16x8_t)__builtin_arm64_vshrq_n_v((int8x16_t)__a, __b, 33); }) +#define vshrq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32x4_t)__builtin_arm64_vshrq_n_v((int8x16_t)__a, __b, 34); }) +#define vshrq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64x2_t)__builtin_arm64_vshrq_n_v((int8x16_t)__a, __b, 35); }) +#define vshrq_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8x16_t)__builtin_arm64_vshrq_n_v((int8x16_t)__a, __b, 48); }) +#define vshrq_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16x8_t)__builtin_arm64_vshrq_n_v((int8x16_t)__a, __b, 49); }) +#define vshrq_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32x4_t)__builtin_arm64_vshrq_n_v((int8x16_t)__a, __b, 50); }) +#define vshrq_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64x2_t)__builtin_arm64_vshrq_n_v((int8x16_t)__a, __b, 51); }) + +#define vsli_n_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_arm64_vsli_n_v(__a, __b, __c, 0); }) +#define vsli_n_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vsli_n_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vsli_n_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vsli_n_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vsli_n_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vsli_n_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vsli_n_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vsli_n_p8(a, b, __c) __extension__ ({ \ + poly8x8_t __a = (a); poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) +#define vsli_n_p16(a, b, __c) __extension__ ({ \ + poly16x4_t __a = (a); poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_arm64_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) +#define vsliq_n_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_arm64_vsliq_n_v(__a, __b, __c, 32); }) +#define vsliq_n_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vsliq_n_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vsliq_n_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vsliq_n_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vsliq_n_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vsliq_n_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vsliq_n_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) +#define vsliq_n_p8(a, b, __c) __extension__ ({ \ + poly8x16_t __a = (a); poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) +#define vsliq_n_p16(a, b, __c) __extension__ ({ \ + poly16x8_t __a = (a); poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_arm64_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) + +__ai float32x2_t vsqrt_f32(float32x2_t __a) { + return (float32x2_t)__builtin_arm64_vsqrt_v((int8x8_t)__a, 7); } +__ai float32x4_t vsqrtq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_arm64_vsqrtq_v((int8x16_t)__a, 39); } +__ai float64x2_t vsqrtq_f64(float64x2_t __a) { + return (float64x2_t)__builtin_arm64_vsqrtq_v((int8x16_t)__a, 40); } + +#define vsra_n_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_arm64_vsra_n_v(__a, __b, __c, 0); }) +#define vsra_n_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_arm64_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vsra_n_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_arm64_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vsra_n_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_arm64_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vsra_n_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_arm64_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vsra_n_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_arm64_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vsra_n_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_arm64_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vsra_n_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_arm64_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vsraq_n_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_arm64_vsraq_n_v(__a, __b, __c, 32); }) +#define vsraq_n_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_arm64_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vsraq_n_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_arm64_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vsraq_n_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_arm64_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vsraq_n_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vsraq_n_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vsraq_n_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vsraq_n_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_arm64_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) + +#define vsri_n_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_arm64_vsri_n_v(__a, __b, __c, 0); }) +#define vsri_n_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vsri_n_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vsri_n_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vsri_n_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vsri_n_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vsri_n_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vsri_n_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vsri_n_p8(a, b, __c) __extension__ ({ \ + poly8x8_t __a = (a); poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) +#define vsri_n_p16(a, b, __c) __extension__ ({ \ + poly16x4_t __a = (a); poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_arm64_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) +#define vsriq_n_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_arm64_vsriq_n_v(__a, __b, __c, 32); }) +#define vsriq_n_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vsriq_n_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vsriq_n_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vsriq_n_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vsriq_n_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vsriq_n_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vsriq_n_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) +#define vsriq_n_p8(a, b, __c) __extension__ ({ \ + poly8x16_t __a = (a); poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) +#define vsriq_n_p16(a, b, __c) __extension__ ({ \ + poly16x8_t __a = (a); poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_arm64_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) + +#define vst1q_u8(__a, b) __extension__ ({ \ + uint8x16_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 48); }) +#define vst1q_u16(__a, b) __extension__ ({ \ + uint16x8_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 49); }) +#define vst1q_u32(__a, b) __extension__ ({ \ + uint32x4_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 50); }) +#define vst1q_u64(__a, b) __extension__ ({ \ + uint64x2_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 51); }) +#define vst1q_s8(__a, b) __extension__ ({ \ + int8x16_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, __b, 32); }) +#define vst1q_s16(__a, b) __extension__ ({ \ + int16x8_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 33); }) +#define vst1q_s32(__a, b) __extension__ ({ \ + int32x4_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 34); }) +#define vst1q_s64(__a, b) __extension__ ({ \ + int64x2_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 35); }) +#define vst1q_f16(__a, b) __extension__ ({ \ + float16x8_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 38); }) +#define vst1q_f32(__a, b) __extension__ ({ \ + float32x4_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 39); }) +#define vst1q_f64(__a, b) __extension__ ({ \ + float64x2_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 40); }) +#define vst1q_p8(__a, b) __extension__ ({ \ + poly8x16_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 36); }) +#define vst1q_p16(__a, b) __extension__ ({ \ + poly16x8_t __b = (b); \ + __builtin_arm64_vst1q_v(__a, (int8x16_t)__b, 37); }) +#define vst1_u8(__a, b) __extension__ ({ \ + uint8x8_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 16); }) +#define vst1_u16(__a, b) __extension__ ({ \ + uint16x4_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 17); }) +#define vst1_u32(__a, b) __extension__ ({ \ + uint32x2_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 18); }) +#define vst1_u64(__a, b) __extension__ ({ \ + uint64x1_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 19); }) +#define vst1_s8(__a, b) __extension__ ({ \ + int8x8_t __b = (b); \ + __builtin_arm64_vst1_v(__a, __b, 0); }) +#define vst1_s16(__a, b) __extension__ ({ \ + int16x4_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 1); }) +#define vst1_s32(__a, b) __extension__ ({ \ + int32x2_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 2); }) +#define vst1_s64(__a, b) __extension__ ({ \ + int64x1_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 3); }) +#define vst1_f16(__a, b) __extension__ ({ \ + float16x4_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 6); }) +#define vst1_f64(__a, b) __extension__ ({ \ + float64x1_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 8); }) +#define vst1_f32(__a, b) __extension__ ({ \ + float32x2_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 7); }) +#define vst1_p8(__a, b) __extension__ ({ \ + poly8x8_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 4); }) +#define vst1_p16(__a, b) __extension__ ({ \ + poly16x4_t __b = (b); \ + __builtin_arm64_vst1_v(__a, (int8x8_t)__b, 5); }) + +#define vst1q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 48); }) +#define vst1q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 49); }) +#define vst1q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 50); }) +#define vst1q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 51); }) +#define vst1q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, __b, __c, 32); }) +#define vst1q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 33); }) +#define vst1q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 34); }) +#define vst1q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 35); }) +#define vst1q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 38); }) +#define vst1q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 39); }) +#define vst1q_lane_f64(__a, b, __c) __extension__ ({ \ + float64x2_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 40); }) +#define vst1q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 36); }) +#define vst1q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8_t __b = (b); \ + __builtin_arm64_vst1q_lane_v(__a, (int8x16_t)__b, __c, 37); }) +#define vst1_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 16); }) +#define vst1_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 17); }) +#define vst1_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 18); }) +#define vst1_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x1_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 19); }) +#define vst1_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, __b, __c, 0); }) +#define vst1_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 1); }) +#define vst1_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 2); }) +#define vst1_lane_s64(__a, b, __c) __extension__ ({ \ + int64x1_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 3); }) +#define vst1_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 6); }) +#define vst1_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 7); }) +#define vst1_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 4); }) +#define vst1_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4_t __b = (b); \ + __builtin_arm64_vst1_lane_v(__a, (int8x8_t)__b, __c, 5); }) + +#define vst2q_u8(__a, b) __extension__ ({ \ + uint8x16x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 48); }) +#define vst2q_u16(__a, b) __extension__ ({ \ + uint16x8x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 49); }) +#define vst2q_u32(__a, b) __extension__ ({ \ + uint32x4x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 50); }) +#define vst2q_u64(__a, b) __extension__ ({ \ + uint64x2x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 51); }) +#define vst2q_s8(__a, b) __extension__ ({ \ + int8x16x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, __b.val[0], __b.val[1], 32); }) +#define vst2q_s16(__a, b) __extension__ ({ \ + int16x8x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 33); }) +#define vst2q_s32(__a, b) __extension__ ({ \ + int32x4x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 34); }) +#define vst2q_s64(__a, b) __extension__ ({ \ + int64x2x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 35); }) +#define vst2q_f16(__a, b) __extension__ ({ \ + float16x8x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 38); }) +#define vst2q_f32(__a, b) __extension__ ({ \ + float32x4x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 39); }) +#define vst2q_f64(__a, b) __extension__ ({ \ + float64x2x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 40); }) +#define vst2q_p8(__a, b) __extension__ ({ \ + poly8x16x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 36); }) +#define vst2q_p16(__a, b) __extension__ ({ \ + poly16x8x2_t __b = (b); \ + __builtin_arm64_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 37); }) +#define vst2_u8(__a, b) __extension__ ({ \ + uint8x8x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 16); }) +#define vst2_u16(__a, b) __extension__ ({ \ + uint16x4x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 17); }) +#define vst2_u32(__a, b) __extension__ ({ \ + uint32x2x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 18); }) +#define vst2_u64(__a, b) __extension__ ({ \ + uint64x1x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 19); }) +#define vst2_s8(__a, b) __extension__ ({ \ + int8x8x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, __b.val[0], __b.val[1], 0); }) +#define vst2_s16(__a, b) __extension__ ({ \ + int16x4x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); }) +#define vst2_s32(__a, b) __extension__ ({ \ + int32x2x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); }) +#define vst2_s64(__a, b) __extension__ ({ \ + int64x1x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); }) +#define vst2_f16(__a, b) __extension__ ({ \ + float16x4x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); }) +#define vst2_f32(__a, b) __extension__ ({ \ + float32x2x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); }) +#define vst2_f64(__a, b) __extension__ ({ \ + float64x1x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 8); }) +#define vst2_p8(__a, b) __extension__ ({ \ + poly8x8x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); }) +#define vst2_p16(__a, b) __extension__ ({ \ + poly16x4x2_t __b = (b); \ + __builtin_arm64_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); }) + +#define vst2q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 48); }) +#define vst2q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); }) +#define vst2q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); }) +#define vst2q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 51); }) +#define vst2q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, __b.val[0], __b.val[1], __c, 32); }) +#define vst2q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); }) +#define vst2q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); }) +#define vst2q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 35); }) +#define vst2q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); }) +#define vst2q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); }) +#define vst2q_lane_f64(__a, b, __c) __extension__ ({ \ + float64x2x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 40); }) +#define vst2q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 36); }) +#define vst2q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x2_t __b = (b); \ + __builtin_arm64_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); }) +#define vst2_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 16); }) +#define vst2_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 17); }) +#define vst2_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 18); }) +#define vst2_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, __b.val[0], __b.val[1], __c, 0); }) +#define vst2_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 1); }) +#define vst2_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 2); }) +#define vst2_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 6); }) +#define vst2_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 7); }) +#define vst2_lane_f64(__a, b, __c) __extension__ ({ \ + float64x1x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 8); }) +#define vst2_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 4); }) +#define vst2_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x2_t __b = (b); \ + __builtin_arm64_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 5); }) + +#define vst3q_u8(__a, b) __extension__ ({ \ + uint8x16x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 48); }) +#define vst3q_u16(__a, b) __extension__ ({ \ + uint16x8x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 49); }) +#define vst3q_u32(__a, b) __extension__ ({ \ + uint32x4x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 50); }) +#define vst3q_u64(__a, b) __extension__ ({ \ + uint64x2x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 51); }) +#define vst3q_s8(__a, b) __extension__ ({ \ + int8x16x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, __b.val[0], __b.val[1], __b.val[2], 32); }) +#define vst3q_s16(__a, b) __extension__ ({ \ + int16x8x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 33); }) +#define vst3q_s32(__a, b) __extension__ ({ \ + int32x4x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 34); }) +#define vst3q_s64(__a, b) __extension__ ({ \ + int64x2x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 35); }) +#define vst3q_f16(__a, b) __extension__ ({ \ + float16x8x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 38); }) +#define vst3q_f32(__a, b) __extension__ ({ \ + float32x4x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 39); }) +#define vst3q_f64(__a, b) __extension__ ({ \ + float64x2x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 40); }) +#define vst3q_p8(__a, b) __extension__ ({ \ + poly8x16x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 36); }) +#define vst3q_p16(__a, b) __extension__ ({ \ + poly16x8x3_t __b = (b); \ + __builtin_arm64_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 37); }) +#define vst3_u8(__a, b) __extension__ ({ \ + uint8x8x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 16); }) +#define vst3_u16(__a, b) __extension__ ({ \ + uint16x4x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 17); }) +#define vst3_u32(__a, b) __extension__ ({ \ + uint32x2x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 18); }) +#define vst3_u64(__a, b) __extension__ ({ \ + uint64x1x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 19); }) +#define vst3_s8(__a, b) __extension__ ({ \ + int8x8x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); }) +#define vst3_s16(__a, b) __extension__ ({ \ + int16x4x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 1); }) +#define vst3_s32(__a, b) __extension__ ({ \ + int32x2x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 2); }) +#define vst3_s64(__a, b) __extension__ ({ \ + int64x1x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 3); }) +#define vst3_f16(__a, b) __extension__ ({ \ + float16x4x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 6); }) +#define vst3_f32(__a, b) __extension__ ({ \ + float32x2x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 7); }) +#define vst3_f64(__a, b) __extension__ ({ \ + float64x1x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 8); }) +#define vst3_p8(__a, b) __extension__ ({ \ + poly8x8x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 4); }) +#define vst3_p16(__a, b) __extension__ ({ \ + poly16x4x3_t __b = (b); \ + __builtin_arm64_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 5); }) + +#define vst3q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 48); }) +#define vst3q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); }) +#define vst3q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); }) +#define vst3q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 51); }) +#define vst3q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 32); }) +#define vst3q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); }) +#define vst3q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); }) +#define vst3q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 35); }) +#define vst3q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); }) +#define vst3q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); }) +#define vst3q_lane_f64(__a, b, __c) __extension__ ({ \ + float64x2x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 40); }) +#define vst3q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 36); }) +#define vst3q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x3_t __b = (b); \ + __builtin_arm64_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); }) +#define vst3_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); }) +#define vst3_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); }) +#define vst3_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); }) +#define vst3_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); }) +#define vst3_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); }) +#define vst3_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); }) +#define vst3_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); }) +#define vst3_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); }) +#define vst3_lane_f64(__a, b, __c) __extension__ ({ \ + float64x1x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 8); }) +#define vst3_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); }) +#define vst3_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x3_t __b = (b); \ + __builtin_arm64_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); }) + +#define vst4q_u8(__a, b) __extension__ ({ \ + uint8x16x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 48); }) +#define vst4q_u16(__a, b) __extension__ ({ \ + uint16x8x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 49); }) +#define vst4q_u32(__a, b) __extension__ ({ \ + uint32x4x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 50); }) +#define vst4q_u64(__a, b) __extension__ ({ \ + uint64x2x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 51); }) +#define vst4q_s8(__a, b) __extension__ ({ \ + int8x16x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 32); }) +#define vst4q_s16(__a, b) __extension__ ({ \ + int16x8x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 33); }) +#define vst4q_s32(__a, b) __extension__ ({ \ + int32x4x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 34); }) +#define vst4q_s64(__a, b) __extension__ ({ \ + int64x2x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 35); }) +#define vst4q_f16(__a, b) __extension__ ({ \ + float16x8x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 38); }) +#define vst4q_f32(__a, b) __extension__ ({ \ + float32x4x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 39); }) +#define vst4q_f64(__a, b) __extension__ ({ \ + float64x2x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 40); }) +#define vst4q_p8(__a, b) __extension__ ({ \ + poly8x16x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 36); }) +#define vst4q_p16(__a, b) __extension__ ({ \ + poly16x8x4_t __b = (b); \ + __builtin_arm64_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 37); }) +#define vst4_u8(__a, b) __extension__ ({ \ + uint8x8x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 16); }) +#define vst4_u16(__a, b) __extension__ ({ \ + uint16x4x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 17); }) +#define vst4_u32(__a, b) __extension__ ({ \ + uint32x2x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 18); }) +#define vst4_u64(__a, b) __extension__ ({ \ + uint64x1x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 19); }) +#define vst4_s8(__a, b) __extension__ ({ \ + int8x8x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0); }) +#define vst4_s16(__a, b) __extension__ ({ \ + int16x4x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 1); }) +#define vst4_s32(__a, b) __extension__ ({ \ + int32x2x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 2); }) +#define vst4_s64(__a, b) __extension__ ({ \ + int64x1x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 3); }) +#define vst4_f16(__a, b) __extension__ ({ \ + float16x4x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 6); }) +#define vst4_f32(__a, b) __extension__ ({ \ + float32x2x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 7); }) +#define vst4_f64(__a, b) __extension__ ({ \ + float64x1x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 8); }) +#define vst4_p8(__a, b) __extension__ ({ \ + poly8x8x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 4); }) +#define vst4_p16(__a, b) __extension__ ({ \ + poly16x4x4_t __b = (b); \ + __builtin_arm64_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 5); }) + +#define vst4q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 48); }) +#define vst4q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); }) +#define vst4q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); }) +#define vst4q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 51); }) +#define vst4q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 32); }) +#define vst4q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); }) +#define vst4q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); }) +#define vst4q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 35); }) +#define vst4q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); }) +#define vst4q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); }) +#define vst4q_lane_f64(__a, b, __c) __extension__ ({ \ + float64x2x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 40); }) +#define vst4q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 36); }) +#define vst4q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x4_t __b = (b); \ + __builtin_arm64_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); }) +#define vst4_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); }) +#define vst4_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); }) +#define vst4_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); }) +#define vst4_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); }) +#define vst4_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); }) +#define vst4_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); }) +#define vst4_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); }) +#define vst4_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); }) +#define vst4_lane_f64(__a, b, __c) __extension__ ({ \ + float64x1x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 8); }) +#define vst4_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); }) +#define vst4_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x4_t __b = (b); \ + __builtin_arm64_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); }) + +__ai int8x8_t vsub_s8(int8x8_t __a, int8x8_t __b) { + return __a - __b; } +__ai int16x4_t vsub_s16(int16x4_t __a, int16x4_t __b) { + return __a - __b; } +__ai int32x2_t vsub_s32(int32x2_t __a, int32x2_t __b) { + return __a - __b; } +__ai int64x1_t vsub_s64(int64x1_t __a, int64x1_t __b) { + return __a - __b; } +__ai float32x2_t vsub_f32(float32x2_t __a, float32x2_t __b) { + return __a - __b; } +__ai uint8x8_t vsub_u8(uint8x8_t __a, uint8x8_t __b) { + return __a - __b; } +__ai uint16x4_t vsub_u16(uint16x4_t __a, uint16x4_t __b) { + return __a - __b; } +__ai uint32x2_t vsub_u32(uint32x2_t __a, uint32x2_t __b) { + return __a - __b; } +__ai uint64x1_t vsub_u64(uint64x1_t __a, uint64x1_t __b) { + return __a - __b; } +__ai int8x16_t vsubq_s8(int8x16_t __a, int8x16_t __b) { + return __a - __b; } +__ai int16x8_t vsubq_s16(int16x8_t __a, int16x8_t __b) { + return __a - __b; } +__ai int32x4_t vsubq_s32(int32x4_t __a, int32x4_t __b) { + return __a - __b; } +__ai int64x2_t vsubq_s64(int64x2_t __a, int64x2_t __b) { + return __a - __b; } +__ai float32x4_t vsubq_f32(float32x4_t __a, float32x4_t __b) { + return __a - __b; } +__ai float64x2_t vsubq_f64(float64x2_t __a, float64x2_t __b) { + return __a - __b; } +__ai uint8x16_t vsubq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a - __b; } +__ai uint16x8_t vsubq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a - __b; } +__ai uint32x4_t vsubq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a - __b; } +__ai uint64x2_t vsubq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a - __b; } + +__ai int64_t vsubd_s64(int64_t __a, int64_t __b) { + return __a - __b; } +__ai uint64_t vsubd_u64(uint64_t __a, uint64_t __b) { + return __a - __b; } + +__ai int8x8_t vsubhn_s16(int16x8_t __a, int16x8_t __b) { + return (int8x8_t)__builtin_arm64_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } +__ai int16x4_t vsubhn_s32(int32x4_t __a, int32x4_t __b) { + return (int16x4_t)__builtin_arm64_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } +__ai int32x2_t vsubhn_s64(int64x2_t __a, int64x2_t __b) { + return (int32x2_t)__builtin_arm64_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } +__ai uint8x8_t vsubhn_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint8x8_t)__builtin_arm64_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } +__ai uint16x4_t vsubhn_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint16x4_t)__builtin_arm64_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } +__ai uint32x2_t vsubhn_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint32x2_t)__builtin_arm64_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } + +__ai int8x16_t vsubhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) { + return (int8x16_t)__builtin_arm64_vsubhn_high_v(__a, (int8x16_t)__b, (int8x16_t)__c, 32); } +__ai int16x8_t vsubhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) { + return (int16x8_t)__builtin_arm64_vsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 33); } +__ai int32x4_t vsubhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) { + return (int32x4_t)__builtin_arm64_vsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } +__ai uint8x16_t vsubhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return (uint8x16_t)__builtin_arm64_vsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); } +__ai uint16x8_t vsubhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint16x8_t)__builtin_arm64_vsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 49); } +__ai uint32x4_t vsubhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return (uint32x4_t)__builtin_arm64_vsubhn_high_v((int8x8_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai int16x8_t vsubl_s8(int8x8_t __a, int8x8_t __b) { + return vmovl_s8(__a) - vmovl_s8(__b); } +__ai int32x4_t vsubl_s16(int16x4_t __a, int16x4_t __b) { + return vmovl_s16(__a) - vmovl_s16(__b); } +__ai int64x2_t vsubl_s32(int32x2_t __a, int32x2_t __b) { + return vmovl_s32(__a) - vmovl_s32(__b); } +__ai uint16x8_t vsubl_u8(uint8x8_t __a, uint8x8_t __b) { + return vmovl_u8(__a) - vmovl_u8(__b); } +__ai uint32x4_t vsubl_u16(uint16x4_t __a, uint16x4_t __b) { + return vmovl_u16(__a) - vmovl_u16(__b); } +__ai uint64x2_t vsubl_u32(uint32x2_t __a, uint32x2_t __b) { + return vmovl_u32(__a) - vmovl_u32(__b); } + +__ai int16x8_t vsubl_high_s8(int8x16_t __a, int8x16_t __b) { + return (int16x8_t)__builtin_arm64_vsubl_high_v(__a, __b, 33); } +__ai int32x4_t vsubl_high_s16(int16x8_t __a, int16x8_t __b) { + return (int32x4_t)__builtin_arm64_vsubl_high_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vsubl_high_s32(int32x4_t __a, int32x4_t __b) { + return (int64x2_t)__builtin_arm64_vsubl_high_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint16x8_t vsubl_high_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint16x8_t)__builtin_arm64_vsubl_high_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vsubl_high_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint32x4_t)__builtin_arm64_vsubl_high_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vsubl_high_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint64x2_t)__builtin_arm64_vsubl_high_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int16x8_t vsubw_s8(int16x8_t __a, int8x8_t __b) { + return __a - vmovl_s8(__b); } +__ai int32x4_t vsubw_s16(int32x4_t __a, int16x4_t __b) { + return __a - vmovl_s16(__b); } +__ai int64x2_t vsubw_s32(int64x2_t __a, int32x2_t __b) { + return __a - vmovl_s32(__b); } +__ai uint16x8_t vsubw_u8(uint16x8_t __a, uint8x8_t __b) { + return __a - vmovl_u8(__b); } +__ai uint32x4_t vsubw_u16(uint32x4_t __a, uint16x4_t __b) { + return __a - vmovl_u16(__b); } +__ai uint64x2_t vsubw_u32(uint64x2_t __a, uint32x2_t __b) { + return __a - vmovl_u32(__b); } + +__ai int16x8_t vsubw_high_s8(int8x16_t __a, int8x16_t __b) { + return (int16x8_t)__builtin_arm64_vsubw_high_v(__a, __b, 33); } +__ai int32x4_t vsubw_high_s16(int16x8_t __a, int16x8_t __b) { + return (int32x4_t)__builtin_arm64_vsubw_high_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vsubw_high_s32(int32x4_t __a, int32x4_t __b) { + return (int64x2_t)__builtin_arm64_vsubw_high_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint16x8_t vsubw_high_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint16x8_t)__builtin_arm64_vsubw_high_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vsubw_high_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint32x4_t)__builtin_arm64_vsubw_high_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vsubw_high_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint64x2_t)__builtin_arm64_vsubw_high_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint8x8_t vtbl1_u8(uint8x8_t __a, uint8x8_t __b) __attribute__((unavailable)); +__ai int8x8_t vtbl1_s8(int8x8_t __a, int8x8_t __b) __attribute__((unavailable)); +__ai poly8x8_t vtbl1_p8(poly8x8_t __a, poly8x8_t __b) __attribute__((unavailable)); + +__ai uint8x8_t vtbl1q_u8(uint8x16_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtbl1q_v((int8x16_t)__a, (int8x8_t)__b, 16); } +__ai int8x8_t vtbl1q_s8(int8x16_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vtbl1q_v(__a, __b, 0); } +__ai poly8x8_t vtbl1q_p8(poly8x16_t __a, poly8x8_t __b) { + return (poly8x8_t)__builtin_arm64_vtbl1q_v((int8x16_t)__a, (int8x8_t)__b, 4); } + +__ai uint8x8_t vtbl2_u8(uint8x8x2_t __a, uint8x8_t __b) __attribute__((unavailable)); +__ai int8x8_t vtbl2_s8(int8x8x2_t __a, int8x8_t __b) __attribute__((unavailable)); +__ai poly8x8_t vtbl2_p8(poly8x8x2_t __a, poly8x8_t __b) __attribute__((unavailable)); + +__ai uint8x8_t vtbl2q_u8(uint8x16x2_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtbl2q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x8_t)__b, 16); } +__ai int8x8_t vtbl2q_s8(int8x16x2_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vtbl2q_v(__a.val[0], __a.val[1], __b, 0); } +__ai poly8x8_t vtbl2q_p8(poly8x16x2_t __a, poly8x8_t __b) { + return (poly8x8_t)__builtin_arm64_vtbl2q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x8_t)__b, 4); } + +__ai uint8x8_t vtbl3_u8(uint8x8x3_t __a, uint8x8_t __b) __attribute__((unavailable)); +__ai int8x8_t vtbl3_s8(int8x8x3_t __a, int8x8_t __b) __attribute__((unavailable)); +__ai poly8x8_t vtbl3_p8(poly8x8x3_t __a, poly8x8_t __b) __attribute__((unavailable)); + +__ai uint8x8_t vtbl3q_u8(uint8x16x3_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtbl3q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x8_t)__b, 16); } +__ai int8x8_t vtbl3q_s8(int8x16x3_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vtbl3q_v(__a.val[0], __a.val[1], __a.val[2], __b, 0); } +__ai poly8x8_t vtbl3q_p8(poly8x16x3_t __a, poly8x8_t __b) { + return (poly8x8_t)__builtin_arm64_vtbl3q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x8_t)__b, 4); } + +__ai uint8x8_t vtbl4_u8(uint8x8x4_t __a, uint8x8_t __b) __attribute__((unavailable)); +__ai int8x8_t vtbl4_s8(int8x8x4_t __a, int8x8_t __b) __attribute__((unavailable)); +__ai poly8x8_t vtbl4_p8(poly8x8x4_t __a, poly8x8_t __b) __attribute__((unavailable)); + +__ai uint8x8_t vtbl4q_u8(uint8x16x4_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtbl4q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__a.val[3], (int8x8_t)__b, 16); } +__ai int8x8_t vtbl4q_s8(int8x16x4_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vtbl4q_v(__a.val[0], __a.val[1], __a.val[2], __a.val[3], __b, 0); } +__ai poly8x8_t vtbl4q_p8(poly8x16x4_t __a, poly8x8_t __b) { + return (poly8x8_t)__builtin_arm64_vtbl4q_v((int8x16_t)__a.val[0], (int8x16_t)__a.val[1], (int8x16_t)__a.val[2], (int8x16_t)__a.val[3], (int8x8_t)__b, 4); } + +__ai uint8x8_t vtbx1_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) __attribute__((unavailable)); +__ai int8x8_t vtbx1_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) __attribute__((unavailable)); +__ai poly8x8_t vtbx1_p8(poly8x8_t __a, poly8x8_t __b, poly8x8_t __c) __attribute__((unavailable)); + +__ai uint8x8_t vtbx1q_u8(uint8x8_t __a, uint8x16_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_arm64_vtbx1q_v((int8x8_t)__a, (int8x16_t)__b, (int8x8_t)__c, 16); } +__ai int8x8_t vtbx1q_s8(int8x8_t __a, int8x16_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_arm64_vtbx1q_v(__a, __b, __c, 0); } +__ai poly8x8_t vtbx1q_p8(poly8x8_t __a, poly8x16_t __b, poly8x8_t __c) { + return (poly8x8_t)__builtin_arm64_vtbx1q_v((int8x8_t)__a, (int8x16_t)__b, (int8x8_t)__c, 4); } + +__ai uint8x8_t vtbx2_u8(uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) __attribute__((unavailable)); +__ai int8x8_t vtbx2_s8(int8x8_t __a, int8x8x2_t __b, int8x8_t __c) __attribute__((unavailable)); +__ai poly8x8_t vtbx2_p8(poly8x8_t __a, poly8x8x2_t __b, poly8x8_t __c) __attribute__((unavailable)); + +__ai uint8x8_t vtbx2q_u8(uint8x8_t __a, uint8x16x2_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_arm64_vtbx2q_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x8_t)__c, 16); } +__ai int8x8_t vtbx2q_s8(int8x8_t __a, int8x16x2_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_arm64_vtbx2q_v(__a, __b.val[0], __b.val[1], __c, 0); } +__ai poly8x8_t vtbx2q_p8(poly8x8_t __a, poly8x16x2_t __b, poly8x8_t __c) { + return (poly8x8_t)__builtin_arm64_vtbx2q_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x8_t)__c, 4); } + +__ai uint8x8_t vtbx3_u8(uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) __attribute__((unavailable)); +__ai int8x8_t vtbx3_s8(int8x8_t __a, int8x8x3_t __b, int8x8_t __c) __attribute__((unavailable)); +__ai poly8x8_t vtbx3_p8(poly8x8_t __a, poly8x8x3_t __b, poly8x8_t __c) __attribute__((unavailable)); + +__ai uint8x8_t vtbx3q_u8(uint8x8_t __a, uint8x16x3_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_arm64_vtbx3q_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x8_t)__c, 16); } +__ai int8x8_t vtbx3q_s8(int8x8_t __a, int8x16x3_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_arm64_vtbx3q_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); } +__ai poly8x8_t vtbx3q_p8(poly8x8_t __a, poly8x16x3_t __b, poly8x8_t __c) { + return (poly8x8_t)__builtin_arm64_vtbx3q_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x8_t)__c, 4); } + +__ai uint8x8_t vtbx4_u8(uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) __attribute__((unavailable)); +__ai int8x8_t vtbx4_s8(int8x8_t __a, int8x8x4_t __b, int8x8_t __c) __attribute__((unavailable)); +__ai poly8x8_t vtbx4_p8(poly8x8_t __a, poly8x8x4_t __b, poly8x8_t __c) __attribute__((unavailable)); + +__ai uint8x8_t vtbx4q_u8(uint8x8_t __a, uint8x16x4_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_arm64_vtbx4q_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], (int8x8_t)__c, 16); } +__ai int8x8_t vtbx4q_s8(int8x8_t __a, int8x16x4_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_arm64_vtbx4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); } +__ai poly8x8_t vtbx4q_p8(poly8x8_t __a, poly8x16x4_t __b, poly8x8_t __c) { + return (poly8x8_t)__builtin_arm64_vtbx4q_v((int8x8_t)__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], (int8x8_t)__c, 4); } + +__ai int8x8x2_t vtrn_s8(int8x8_t __a, int8x8_t __b) { + int8x8x2_t r; __builtin_arm64_vtrn_v(&r, __a, __b, 0); return r; } +__ai int16x4x2_t vtrn_s16(int16x4_t __a, int16x4_t __b) { + int16x4x2_t r; __builtin_arm64_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } +__ai int32x2x2_t vtrn_s32(int32x2_t __a, int32x2_t __b) { + int32x2x2_t r; __builtin_arm64_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } +__ai uint8x8x2_t vtrn_u8(uint8x8_t __a, uint8x8_t __b) { + uint8x8x2_t r; __builtin_arm64_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } +__ai uint16x4x2_t vtrn_u16(uint16x4_t __a, uint16x4_t __b) { + uint16x4x2_t r; __builtin_arm64_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } +__ai uint32x2x2_t vtrn_u32(uint32x2_t __a, uint32x2_t __b) { + uint32x2x2_t r; __builtin_arm64_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } +__ai float32x2x2_t vtrn_f32(float32x2_t __a, float32x2_t __b) { + float32x2x2_t r; __builtin_arm64_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } +__ai poly8x8x2_t vtrn_p8(poly8x8_t __a, poly8x8_t __b) { + poly8x8x2_t r; __builtin_arm64_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } +__ai poly16x4x2_t vtrn_p16(poly16x4_t __a, poly16x4_t __b) { + poly16x4x2_t r; __builtin_arm64_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } +__ai int8x16x2_t vtrnq_s8(int8x16_t __a, int8x16_t __b) { + int8x16x2_t r; __builtin_arm64_vtrnq_v(&r, __a, __b, 32); return r; } +__ai int16x8x2_t vtrnq_s16(int16x8_t __a, int16x8_t __b) { + int16x8x2_t r; __builtin_arm64_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } +__ai int32x4x2_t vtrnq_s32(int32x4_t __a, int32x4_t __b) { + int32x4x2_t r; __builtin_arm64_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } +__ai uint8x16x2_t vtrnq_u8(uint8x16_t __a, uint8x16_t __b) { + uint8x16x2_t r; __builtin_arm64_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } +__ai uint16x8x2_t vtrnq_u16(uint16x8_t __a, uint16x8_t __b) { + uint16x8x2_t r; __builtin_arm64_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } +__ai uint32x4x2_t vtrnq_u32(uint32x4_t __a, uint32x4_t __b) { + uint32x4x2_t r; __builtin_arm64_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } +__ai float32x4x2_t vtrnq_f32(float32x4_t __a, float32x4_t __b) { + float32x4x2_t r; __builtin_arm64_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } +__ai poly8x16x2_t vtrnq_p8(poly8x16_t __a, poly8x16_t __b) { + poly8x16x2_t r; __builtin_arm64_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } +__ai poly16x8x2_t vtrnq_p16(poly16x8_t __a, poly16x8_t __b) { + poly16x8x2_t r; __builtin_arm64_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } + +__ai int8x8_t vtrn1_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vtrn1_v(__a, __b, 0); } +__ai int16x4_t vtrn1_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vtrn1_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vtrn1_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vtrn1_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vtrn1_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtrn1_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vtrn1_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vtrn1_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vtrn1_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vtrn1_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vtrn1_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vtrn1_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vtrn1q_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vtrn1q_v(__a, __b, 32); } +__ai int16x8_t vtrn1q_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vtrn1q_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vtrn1q_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vtrn1q_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vtrn1q_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vtrn1q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vtrn1q_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai float32x4_t vtrn1q_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vtrn1q_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vtrn1q_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x8_t vtrn2_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vtrn2_v(__a, __b, 0); } +__ai int16x4_t vtrn2_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vtrn2_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vtrn2_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vtrn2_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vtrn2_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtrn2_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vtrn2_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vtrn2_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vtrn2_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vtrn2_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vtrn2_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vtrn2_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vtrn2q_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vtrn2q_v(__a, __b, 32); } +__ai int16x8_t vtrn2q_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vtrn2q_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vtrn2q_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vtrn2q_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vtrn2q_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vtrn2q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vtrn2q_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai float32x4_t vtrn2q_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vtrn2q_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vtrn2q_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai uint8x8_t vtst_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtst_v(__a, __b, 16); } +__ai uint16x4_t vtst_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vtst_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint8x8_t vtst_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vtst_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vtst_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint8x8_t vtst_p8(poly8x8_t __a, poly8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint8x16_t vtstq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vtstq_v(__a, __b, 48); } +__ai uint16x8_t vtstq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vtstq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vtstq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai uint8x16_t vtstq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vtstq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vtstq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vtstq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai uint8x16_t vtstq_p8(poly8x16_t __a, poly8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); } + +__ai int64_t vtstd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_arm64_vtstd_s64(__a, __b); } +__ai uint64_t vtstd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_arm64_vtstd_u64(__a, __b); } + +__ai int8x8_t vusqadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vusqadd_v(__a, __b, 0); } +__ai int16x4_t vusqadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vusqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vusqadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vusqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vusqadd_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_arm64_vusqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vusqadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vusqadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vusqadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vusqadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vusqadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vusqadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vusqadd_u64(uint64x1_t __a, uint64x1_t __b) { + return (uint64x1_t)__builtin_arm64_vusqadd_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vusqaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vusqaddq_v(__a, __b, 32); } +__ai int16x8_t vusqaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vusqaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vusqaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vusqaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vusqaddq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vusqaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vusqaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vusqaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vusqaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vusqaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vusqaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vusqaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vusqaddq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vusqaddq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int8x8x2_t vuzp_s8(int8x8_t __a, int8x8_t __b) { + int8x8x2_t r; __builtin_arm64_vuzp_v(&r, __a, __b, 0); return r; } +__ai int16x4x2_t vuzp_s16(int16x4_t __a, int16x4_t __b) { + int16x4x2_t r; __builtin_arm64_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } +__ai int32x2x2_t vuzp_s32(int32x2_t __a, int32x2_t __b) { + int32x2x2_t r; __builtin_arm64_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } +__ai uint8x8x2_t vuzp_u8(uint8x8_t __a, uint8x8_t __b) { + uint8x8x2_t r; __builtin_arm64_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } +__ai uint16x4x2_t vuzp_u16(uint16x4_t __a, uint16x4_t __b) { + uint16x4x2_t r; __builtin_arm64_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } +__ai uint32x2x2_t vuzp_u32(uint32x2_t __a, uint32x2_t __b) { + uint32x2x2_t r; __builtin_arm64_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } +__ai float32x2x2_t vuzp_f32(float32x2_t __a, float32x2_t __b) { + float32x2x2_t r; __builtin_arm64_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } +__ai poly8x8x2_t vuzp_p8(poly8x8_t __a, poly8x8_t __b) { + poly8x8x2_t r; __builtin_arm64_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } +__ai poly16x4x2_t vuzp_p16(poly16x4_t __a, poly16x4_t __b) { + poly16x4x2_t r; __builtin_arm64_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } +__ai int8x16x2_t vuzpq_s8(int8x16_t __a, int8x16_t __b) { + int8x16x2_t r; __builtin_arm64_vuzpq_v(&r, __a, __b, 32); return r; } +__ai int16x8x2_t vuzpq_s16(int16x8_t __a, int16x8_t __b) { + int16x8x2_t r; __builtin_arm64_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } +__ai int32x4x2_t vuzpq_s32(int32x4_t __a, int32x4_t __b) { + int32x4x2_t r; __builtin_arm64_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } +__ai uint8x16x2_t vuzpq_u8(uint8x16_t __a, uint8x16_t __b) { + uint8x16x2_t r; __builtin_arm64_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } +__ai uint16x8x2_t vuzpq_u16(uint16x8_t __a, uint16x8_t __b) { + uint16x8x2_t r; __builtin_arm64_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } +__ai uint32x4x2_t vuzpq_u32(uint32x4_t __a, uint32x4_t __b) { + uint32x4x2_t r; __builtin_arm64_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } +__ai float32x4x2_t vuzpq_f32(float32x4_t __a, float32x4_t __b) { + float32x4x2_t r; __builtin_arm64_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } +__ai poly8x16x2_t vuzpq_p8(poly8x16_t __a, poly8x16_t __b) { + poly8x16x2_t r; __builtin_arm64_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } +__ai poly16x8x2_t vuzpq_p16(poly16x8_t __a, poly16x8_t __b) { + poly16x8x2_t r; __builtin_arm64_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } + +__ai int8x8_t vuzp1_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vuzp1_v(__a, __b, 0); } +__ai int16x4_t vuzp1_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vuzp1_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vuzp1_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vuzp1_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vuzp1_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vuzp1_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vuzp1_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vuzp1_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vuzp1_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vuzp1_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vuzp1_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vuzp1_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vuzp1q_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vuzp1q_v(__a, __b, 32); } +__ai int16x8_t vuzp1q_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vuzp1q_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vuzp1q_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vuzp1q_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vuzp1q_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vuzp1q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vuzp1q_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai float32x4_t vuzp1q_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vuzp1q_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vuzp1q_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x8_t vuzp2_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vuzp2_v(__a, __b, 0); } +__ai int16x4_t vuzp2_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vuzp2_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vuzp2_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vuzp2_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vuzp2_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vuzp2_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vuzp2_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vuzp2_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vuzp2_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vuzp2_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vuzp2_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vuzp2_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vuzp2q_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vuzp2q_v(__a, __b, 32); } +__ai int16x8_t vuzp2q_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vuzp2q_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vuzp2q_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vuzp2q_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vuzp2q_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vuzp2q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vuzp2q_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai float32x4_t vuzp2q_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vuzp2q_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vuzp2q_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int16x8_t vxtl_high_s8(int8x16_t __a) { + return vmovl_s8(vget_high_s8(__a)); } +__ai int32x4_t vxtl_high_s16(int16x8_t __a) { + return vmovl_s16(vget_high_s16(__a)); } +__ai int64x2_t vxtl_high_s32(int32x4_t __a) { + return vmovl_s32(vget_high_s32(__a)); } +__ai uint16x8_t vxtl_high_u8(uint8x16_t __a) { + return vmovl_u8(vget_high_u8(__a)); } +__ai uint32x4_t vxtl_high_u16(uint16x8_t __a) { + return vmovl_u16(vget_high_u16(__a)); } +__ai uint64x2_t vxtl_high_u32(uint32x4_t __a) { + return vmovl_u32(vget_high_u32(__a)); } + +__ai int8x8x2_t vzip_s8(int8x8_t __a, int8x8_t __b) { + int8x8x2_t r; __builtin_arm64_vzip_v(&r, __a, __b, 0); return r; } +__ai int16x4x2_t vzip_s16(int16x4_t __a, int16x4_t __b) { + int16x4x2_t r; __builtin_arm64_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } +__ai int32x2x2_t vzip_s32(int32x2_t __a, int32x2_t __b) { + int32x2x2_t r; __builtin_arm64_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } +__ai uint8x8x2_t vzip_u8(uint8x8_t __a, uint8x8_t __b) { + uint8x8x2_t r; __builtin_arm64_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } +__ai uint16x4x2_t vzip_u16(uint16x4_t __a, uint16x4_t __b) { + uint16x4x2_t r; __builtin_arm64_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } +__ai uint32x2x2_t vzip_u32(uint32x2_t __a, uint32x2_t __b) { + uint32x2x2_t r; __builtin_arm64_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } +__ai float32x2x2_t vzip_f32(float32x2_t __a, float32x2_t __b) { + float32x2x2_t r; __builtin_arm64_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } +__ai poly8x8x2_t vzip_p8(poly8x8_t __a, poly8x8_t __b) { + poly8x8x2_t r; __builtin_arm64_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } +__ai poly16x4x2_t vzip_p16(poly16x4_t __a, poly16x4_t __b) { + poly16x4x2_t r; __builtin_arm64_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } +__ai int8x16x2_t vzipq_s8(int8x16_t __a, int8x16_t __b) { + int8x16x2_t r; __builtin_arm64_vzipq_v(&r, __a, __b, 32); return r; } +__ai int16x8x2_t vzipq_s16(int16x8_t __a, int16x8_t __b) { + int16x8x2_t r; __builtin_arm64_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } +__ai int32x4x2_t vzipq_s32(int32x4_t __a, int32x4_t __b) { + int32x4x2_t r; __builtin_arm64_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } +__ai uint8x16x2_t vzipq_u8(uint8x16_t __a, uint8x16_t __b) { + uint8x16x2_t r; __builtin_arm64_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } +__ai uint16x8x2_t vzipq_u16(uint16x8_t __a, uint16x8_t __b) { + uint16x8x2_t r; __builtin_arm64_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } +__ai uint32x4x2_t vzipq_u32(uint32x4_t __a, uint32x4_t __b) { + uint32x4x2_t r; __builtin_arm64_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } +__ai float32x4x2_t vzipq_f32(float32x4_t __a, float32x4_t __b) { + float32x4x2_t r; __builtin_arm64_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } +__ai poly8x16x2_t vzipq_p8(poly8x16_t __a, poly8x16_t __b) { + poly8x16x2_t r; __builtin_arm64_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } +__ai poly16x8x2_t vzipq_p16(poly16x8_t __a, poly16x8_t __b) { + poly16x8x2_t r; __builtin_arm64_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } + +__ai int8x8_t vzip1_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vzip1_v(__a, __b, 0); } +__ai int16x4_t vzip1_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vzip1_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vzip1_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vzip1_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vzip1_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vzip1_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vzip1_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vzip1_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vzip1_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vzip1_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vzip1_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vzip1_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vzip1q_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vzip1q_v(__a, __b, 32); } +__ai int16x8_t vzip1q_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vzip1q_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vzip1q_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vzip1q_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vzip1q_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vzip1q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vzip1q_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai float32x4_t vzip1q_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vzip1q_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vzip1q_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x8_t vzip2_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_arm64_vzip2_v(__a, __b, 0); } +__ai int16x4_t vzip2_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_arm64_vzip2_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vzip2_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_arm64_vzip2_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vzip2_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_arm64_vzip2_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vzip2_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_arm64_vzip2_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vzip2_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_arm64_vzip2_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vzip2_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_arm64_vzip2_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vzip2q_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_arm64_vzip2q_v(__a, __b, 32); } +__ai int16x8_t vzip2q_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vzip2q_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vzip2q_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vzip2q_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vzip2q_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vzip2q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vzip2q_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai float32x4_t vzip2q_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vzip2q_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_arm64_vzip2q_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +#undef __ai + +#endif /* not __arm64 Darwin*/ + +#endif /* __AARCH64_SIMD_H */ diff --git a/python/clang/5.1/include/altivec.h b/python/clang/5.1/include/altivec.h new file mode 100644 index 00000000..74ce08aa --- /dev/null +++ b/python/clang/5.1/include/altivec.h @@ -0,0 +1,11856 @@ +/*===---- altivec.h - Standard header for type generic math ---------------===*\ + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __ALTIVEC_H +#define __ALTIVEC_H + +#ifndef __ALTIVEC__ +#error "AltiVec support not enabled" +#endif + +/* constants for mapping CR6 bits to predicate result. */ + +#define __CR6_EQ 0 +#define __CR6_EQ_REV 1 +#define __CR6_LT 2 +#define __CR6_LT_REV 3 + +#define __ATTRS_o_ai __attribute__((__overloadable__, __always_inline__)) + +static vector signed char __ATTRS_o_ai +vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c); + +static vector unsigned char __ATTRS_o_ai +vec_perm(vector unsigned char __a, + vector unsigned char __b, + vector unsigned char __c); + +static vector bool char __ATTRS_o_ai +vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c); + +static vector short __ATTRS_o_ai +vec_perm(vector short __a, vector short __b, vector unsigned char __c); + +static vector unsigned short __ATTRS_o_ai +vec_perm(vector unsigned short __a, + vector unsigned short __b, + vector unsigned char __c); + +static vector bool short __ATTRS_o_ai +vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c); + +static vector pixel __ATTRS_o_ai +vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c); + +static vector int __ATTRS_o_ai +vec_perm(vector int __a, vector int __b, vector unsigned char __c); + +static vector unsigned int __ATTRS_o_ai +vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c); + +static vector bool int __ATTRS_o_ai +vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c); + +static vector float __ATTRS_o_ai +vec_perm(vector float __a, vector float __b, vector unsigned char __c); + +/* vec_abs */ + +#define __builtin_altivec_abs_v16qi vec_abs +#define __builtin_altivec_abs_v8hi vec_abs +#define __builtin_altivec_abs_v4si vec_abs + +static vector signed char __ATTRS_o_ai +vec_abs(vector signed char __a) +{ + return __builtin_altivec_vmaxsb(__a, -__a); +} + +static vector signed short __ATTRS_o_ai +vec_abs(vector signed short __a) +{ + return __builtin_altivec_vmaxsh(__a, -__a); +} + +static vector signed int __ATTRS_o_ai +vec_abs(vector signed int __a) +{ + return __builtin_altivec_vmaxsw(__a, -__a); +} + +static vector float __ATTRS_o_ai +vec_abs(vector float __a) +{ + vector unsigned int __res = (vector unsigned int)__a + & (vector unsigned int)(0x7FFFFFFF); + return (vector float)__res; +} + +/* vec_abss */ + +#define __builtin_altivec_abss_v16qi vec_abss +#define __builtin_altivec_abss_v8hi vec_abss +#define __builtin_altivec_abss_v4si vec_abss + +static vector signed char __ATTRS_o_ai +vec_abss(vector signed char __a) +{ + return __builtin_altivec_vmaxsb + (__a, __builtin_altivec_vsubsbs((vector signed char)(0), __a)); +} + +static vector signed short __ATTRS_o_ai +vec_abss(vector signed short __a) +{ + return __builtin_altivec_vmaxsh + (__a, __builtin_altivec_vsubshs((vector signed short)(0), __a)); +} + +static vector signed int __ATTRS_o_ai +vec_abss(vector signed int __a) +{ + return __builtin_altivec_vmaxsw + (__a, __builtin_altivec_vsubsws((vector signed int)(0), __a)); +} + +/* vec_add */ + +static vector signed char __ATTRS_o_ai +vec_add(vector signed char __a, vector signed char __b) +{ + return __a + __b; +} + +static vector signed char __ATTRS_o_ai +vec_add(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a + __b; +} + +static vector signed char __ATTRS_o_ai +vec_add(vector signed char __a, vector bool char __b) +{ + return __a + (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_add(vector unsigned char __a, vector unsigned char __b) +{ + return __a + __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_add(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a + __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_add(vector unsigned char __a, vector bool char __b) +{ + return __a + (vector unsigned char)__b; +} + +static vector short __ATTRS_o_ai +vec_add(vector short __a, vector short __b) +{ + return __a + __b; +} + +static vector short __ATTRS_o_ai +vec_add(vector bool short __a, vector short __b) +{ + return (vector short)__a + __b; +} + +static vector short __ATTRS_o_ai +vec_add(vector short __a, vector bool short __b) +{ + return __a + (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_add(vector unsigned short __a, vector unsigned short __b) +{ + return __a + __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_add(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a + __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_add(vector unsigned short __a, vector bool short __b) +{ + return __a + (vector unsigned short)__b; +} + +static vector int __ATTRS_o_ai +vec_add(vector int __a, vector int __b) +{ + return __a + __b; +} + +static vector int __ATTRS_o_ai +vec_add(vector bool int __a, vector int __b) +{ + return (vector int)__a + __b; +} + +static vector int __ATTRS_o_ai +vec_add(vector int __a, vector bool int __b) +{ + return __a + (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_add(vector unsigned int __a, vector unsigned int __b) +{ + return __a + __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_add(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a + __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_add(vector unsigned int __a, vector bool int __b) +{ + return __a + (vector unsigned int)__b; +} + +static vector float __ATTRS_o_ai +vec_add(vector float __a, vector float __b) +{ + return __a + __b; +} + +/* vec_vaddubm */ + +#define __builtin_altivec_vaddubm vec_vaddubm + +static vector signed char __ATTRS_o_ai +vec_vaddubm(vector signed char __a, vector signed char __b) +{ + return __a + __b; +} + +static vector signed char __ATTRS_o_ai +vec_vaddubm(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a + __b; +} + +static vector signed char __ATTRS_o_ai +vec_vaddubm(vector signed char __a, vector bool char __b) +{ + return __a + (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vaddubm(vector unsigned char __a, vector unsigned char __b) +{ + return __a + __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vaddubm(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a + __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vaddubm(vector unsigned char __a, vector bool char __b) +{ + return __a + (vector unsigned char)__b; +} + +/* vec_vadduhm */ + +#define __builtin_altivec_vadduhm vec_vadduhm + +static vector short __ATTRS_o_ai +vec_vadduhm(vector short __a, vector short __b) +{ + return __a + __b; +} + +static vector short __ATTRS_o_ai +vec_vadduhm(vector bool short __a, vector short __b) +{ + return (vector short)__a + __b; +} + +static vector short __ATTRS_o_ai +vec_vadduhm(vector short __a, vector bool short __b) +{ + return __a + (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vadduhm(vector unsigned short __a, vector unsigned short __b) +{ + return __a + __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vadduhm(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a + __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vadduhm(vector unsigned short __a, vector bool short __b) +{ + return __a + (vector unsigned short)__b; +} + +/* vec_vadduwm */ + +#define __builtin_altivec_vadduwm vec_vadduwm + +static vector int __ATTRS_o_ai +vec_vadduwm(vector int __a, vector int __b) +{ + return __a + __b; +} + +static vector int __ATTRS_o_ai +vec_vadduwm(vector bool int __a, vector int __b) +{ + return (vector int)__a + __b; +} + +static vector int __ATTRS_o_ai +vec_vadduwm(vector int __a, vector bool int __b) +{ + return __a + (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vadduwm(vector unsigned int __a, vector unsigned int __b) +{ + return __a + __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vadduwm(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a + __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vadduwm(vector unsigned int __a, vector bool int __b) +{ + return __a + (vector unsigned int)__b; +} + +/* vec_vaddfp */ + +#define __builtin_altivec_vaddfp vec_vaddfp + +static vector float __attribute__((__always_inline__)) +vec_vaddfp(vector float __a, vector float __b) +{ + return __a + __b; +} + +/* vec_addc */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_addc(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vaddcuw(__a, __b); +} + +/* vec_vaddcuw */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vaddcuw(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vaddcuw(__a, __b); +} + +/* vec_adds */ + +static vector signed char __ATTRS_o_ai +vec_adds(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vaddsbs(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_adds(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vaddsbs((vector signed char)__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_adds(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vaddsbs(__a, (vector signed char)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_adds(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vaddubs(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_adds(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vaddubs((vector unsigned char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_adds(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vaddubs(__a, (vector unsigned char)__b); +} + +static vector short __ATTRS_o_ai +vec_adds(vector short __a, vector short __b) +{ + return __builtin_altivec_vaddshs(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_adds(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vaddshs((vector short)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_adds(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vaddshs(__a, (vector short)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_adds(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vadduhs(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_adds(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vadduhs((vector unsigned short)__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_adds(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vadduhs(__a, (vector unsigned short)__b); +} + +static vector int __ATTRS_o_ai +vec_adds(vector int __a, vector int __b) +{ + return __builtin_altivec_vaddsws(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_adds(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vaddsws((vector int)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_adds(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vaddsws(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_adds(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vadduws(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_adds(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vadduws((vector unsigned int)__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_adds(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vadduws(__a, (vector unsigned int)__b); +} + +/* vec_vaddsbs */ + +static vector signed char __ATTRS_o_ai +vec_vaddsbs(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vaddsbs(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_vaddsbs(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vaddsbs((vector signed char)__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_vaddsbs(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vaddsbs(__a, (vector signed char)__b); +} + +/* vec_vaddubs */ + +static vector unsigned char __ATTRS_o_ai +vec_vaddubs(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vaddubs(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vaddubs(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vaddubs((vector unsigned char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vaddubs(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vaddubs(__a, (vector unsigned char)__b); +} + +/* vec_vaddshs */ + +static vector short __ATTRS_o_ai +vec_vaddshs(vector short __a, vector short __b) +{ + return __builtin_altivec_vaddshs(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_vaddshs(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vaddshs((vector short)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_vaddshs(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vaddshs(__a, (vector short)__b); +} + +/* vec_vadduhs */ + +static vector unsigned short __ATTRS_o_ai +vec_vadduhs(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vadduhs(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vadduhs(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vadduhs((vector unsigned short)__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vadduhs(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vadduhs(__a, (vector unsigned short)__b); +} + +/* vec_vaddsws */ + +static vector int __ATTRS_o_ai +vec_vaddsws(vector int __a, vector int __b) +{ + return __builtin_altivec_vaddsws(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_vaddsws(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vaddsws((vector int)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_vaddsws(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vaddsws(__a, (vector int)__b); +} + +/* vec_vadduws */ + +static vector unsigned int __ATTRS_o_ai +vec_vadduws(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vadduws(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vadduws(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vadduws((vector unsigned int)__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vadduws(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vadduws(__a, (vector unsigned int)__b); +} + +/* vec_and */ + +#define __builtin_altivec_vand vec_and + +static vector signed char __ATTRS_o_ai +vec_and(vector signed char __a, vector signed char __b) +{ + return __a & __b; +} + +static vector signed char __ATTRS_o_ai +vec_and(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a & __b; +} + +static vector signed char __ATTRS_o_ai +vec_and(vector signed char __a, vector bool char __b) +{ + return __a & (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_and(vector unsigned char __a, vector unsigned char __b) +{ + return __a & __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_and(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a & __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_and(vector unsigned char __a, vector bool char __b) +{ + return __a & (vector unsigned char)__b; +} + +static vector bool char __ATTRS_o_ai +vec_and(vector bool char __a, vector bool char __b) +{ + return __a & __b; +} + +static vector short __ATTRS_o_ai +vec_and(vector short __a, vector short __b) +{ + return __a & __b; +} + +static vector short __ATTRS_o_ai +vec_and(vector bool short __a, vector short __b) +{ + return (vector short)__a & __b; +} + +static vector short __ATTRS_o_ai +vec_and(vector short __a, vector bool short __b) +{ + return __a & (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_and(vector unsigned short __a, vector unsigned short __b) +{ + return __a & __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_and(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a & __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_and(vector unsigned short __a, vector bool short __b) +{ + return __a & (vector unsigned short)__b; +} + +static vector bool short __ATTRS_o_ai +vec_and(vector bool short __a, vector bool short __b) +{ + return __a & __b; +} + +static vector int __ATTRS_o_ai +vec_and(vector int __a, vector int __b) +{ + return __a & __b; +} + +static vector int __ATTRS_o_ai +vec_and(vector bool int __a, vector int __b) +{ + return (vector int)__a & __b; +} + +static vector int __ATTRS_o_ai +vec_and(vector int __a, vector bool int __b) +{ + return __a & (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_and(vector unsigned int __a, vector unsigned int __b) +{ + return __a & __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_and(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a & __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_and(vector unsigned int __a, vector bool int __b) +{ + return __a & (vector unsigned int)__b; +} + +static vector bool int __ATTRS_o_ai +vec_and(vector bool int __a, vector bool int __b) +{ + return __a & __b; +} + +static vector float __ATTRS_o_ai +vec_and(vector float __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_and(vector bool int __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_and(vector float __a, vector bool int __b) +{ + vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; + return (vector float)__res; +} + +/* vec_vand */ + +static vector signed char __ATTRS_o_ai +vec_vand(vector signed char __a, vector signed char __b) +{ + return __a & __b; +} + +static vector signed char __ATTRS_o_ai +vec_vand(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a & __b; +} + +static vector signed char __ATTRS_o_ai +vec_vand(vector signed char __a, vector bool char __b) +{ + return __a & (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vand(vector unsigned char __a, vector unsigned char __b) +{ + return __a & __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vand(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a & __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vand(vector unsigned char __a, vector bool char __b) +{ + return __a & (vector unsigned char)__b; +} + +static vector bool char __ATTRS_o_ai +vec_vand(vector bool char __a, vector bool char __b) +{ + return __a & __b; +} + +static vector short __ATTRS_o_ai +vec_vand(vector short __a, vector short __b) +{ + return __a & __b; +} + +static vector short __ATTRS_o_ai +vec_vand(vector bool short __a, vector short __b) +{ + return (vector short)__a & __b; +} + +static vector short __ATTRS_o_ai +vec_vand(vector short __a, vector bool short __b) +{ + return __a & (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vand(vector unsigned short __a, vector unsigned short __b) +{ + return __a & __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vand(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a & __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vand(vector unsigned short __a, vector bool short __b) +{ + return __a & (vector unsigned short)__b; +} + +static vector bool short __ATTRS_o_ai +vec_vand(vector bool short __a, vector bool short __b) +{ + return __a & __b; +} + +static vector int __ATTRS_o_ai +vec_vand(vector int __a, vector int __b) +{ + return __a & __b; +} + +static vector int __ATTRS_o_ai +vec_vand(vector bool int __a, vector int __b) +{ + return (vector int)__a & __b; +} + +static vector int __ATTRS_o_ai +vec_vand(vector int __a, vector bool int __b) +{ + return __a & (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vand(vector unsigned int __a, vector unsigned int __b) +{ + return __a & __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vand(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a & __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vand(vector unsigned int __a, vector bool int __b) +{ + return __a & (vector unsigned int)__b; +} + +static vector bool int __ATTRS_o_ai +vec_vand(vector bool int __a, vector bool int __b) +{ + return __a & __b; +} + +static vector float __ATTRS_o_ai +vec_vand(vector float __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vand(vector bool int __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vand(vector float __a, vector bool int __b) +{ + vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b; + return (vector float)__res; +} + +/* vec_andc */ + +#define __builtin_altivec_vandc vec_andc + +static vector signed char __ATTRS_o_ai +vec_andc(vector signed char __a, vector signed char __b) +{ + return __a & ~__b; +} + +static vector signed char __ATTRS_o_ai +vec_andc(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a & ~__b; +} + +static vector signed char __ATTRS_o_ai +vec_andc(vector signed char __a, vector bool char __b) +{ + return __a & ~(vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_andc(vector unsigned char __a, vector unsigned char __b) +{ + return __a & ~__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_andc(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a & ~__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_andc(vector unsigned char __a, vector bool char __b) +{ + return __a & ~(vector unsigned char)__b; +} + +static vector bool char __ATTRS_o_ai +vec_andc(vector bool char __a, vector bool char __b) +{ + return __a & ~__b; +} + +static vector short __ATTRS_o_ai +vec_andc(vector short __a, vector short __b) +{ + return __a & ~__b; +} + +static vector short __ATTRS_o_ai +vec_andc(vector bool short __a, vector short __b) +{ + return (vector short)__a & ~__b; +} + +static vector short __ATTRS_o_ai +vec_andc(vector short __a, vector bool short __b) +{ + return __a & ~(vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_andc(vector unsigned short __a, vector unsigned short __b) +{ + return __a & ~__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_andc(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a & ~__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_andc(vector unsigned short __a, vector bool short __b) +{ + return __a & ~(vector unsigned short)__b; +} + +static vector bool short __ATTRS_o_ai +vec_andc(vector bool short __a, vector bool short __b) +{ + return __a & ~__b; +} + +static vector int __ATTRS_o_ai +vec_andc(vector int __a, vector int __b) +{ + return __a & ~__b; +} + +static vector int __ATTRS_o_ai +vec_andc(vector bool int __a, vector int __b) +{ + return (vector int)__a & ~__b; +} + +static vector int __ATTRS_o_ai +vec_andc(vector int __a, vector bool int __b) +{ + return __a & ~(vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_andc(vector unsigned int __a, vector unsigned int __b) +{ + return __a & ~__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_andc(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a & ~__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_andc(vector unsigned int __a, vector bool int __b) +{ + return __a & ~(vector unsigned int)__b; +} + +static vector bool int __ATTRS_o_ai +vec_andc(vector bool int __a, vector bool int __b) +{ + return __a & ~__b; +} + +static vector float __ATTRS_o_ai +vec_andc(vector float __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_andc(vector bool int __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_andc(vector float __a, vector bool int __b) +{ + vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; + return (vector float)__res; +} + +/* vec_vandc */ + +static vector signed char __ATTRS_o_ai +vec_vandc(vector signed char __a, vector signed char __b) +{ + return __a & ~__b; +} + +static vector signed char __ATTRS_o_ai +vec_vandc(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a & ~__b; +} + +static vector signed char __ATTRS_o_ai +vec_vandc(vector signed char __a, vector bool char __b) +{ + return __a & ~(vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vandc(vector unsigned char __a, vector unsigned char __b) +{ + return __a & ~__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vandc(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a & ~__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vandc(vector unsigned char __a, vector bool char __b) +{ + return __a & ~(vector unsigned char)__b; +} + +static vector bool char __ATTRS_o_ai +vec_vandc(vector bool char __a, vector bool char __b) +{ + return __a & ~__b; +} + +static vector short __ATTRS_o_ai +vec_vandc(vector short __a, vector short __b) +{ + return __a & ~__b; +} + +static vector short __ATTRS_o_ai +vec_vandc(vector bool short __a, vector short __b) +{ + return (vector short)__a & ~__b; +} + +static vector short __ATTRS_o_ai +vec_vandc(vector short __a, vector bool short __b) +{ + return __a & ~(vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vandc(vector unsigned short __a, vector unsigned short __b) +{ + return __a & ~__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vandc(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a & ~__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vandc(vector unsigned short __a, vector bool short __b) +{ + return __a & ~(vector unsigned short)__b; +} + +static vector bool short __ATTRS_o_ai +vec_vandc(vector bool short __a, vector bool short __b) +{ + return __a & ~__b; +} + +static vector int __ATTRS_o_ai +vec_vandc(vector int __a, vector int __b) +{ + return __a & ~__b; +} + +static vector int __ATTRS_o_ai +vec_vandc(vector bool int __a, vector int __b) +{ + return (vector int)__a & ~__b; +} + +static vector int __ATTRS_o_ai +vec_vandc(vector int __a, vector bool int __b) +{ + return __a & ~(vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vandc(vector unsigned int __a, vector unsigned int __b) +{ + return __a & ~__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vandc(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a & ~__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vandc(vector unsigned int __a, vector bool int __b) +{ + return __a & ~(vector unsigned int)__b; +} + +static vector bool int __ATTRS_o_ai +vec_vandc(vector bool int __a, vector bool int __b) +{ + return __a & ~__b; +} + +static vector float __ATTRS_o_ai +vec_vandc(vector float __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vandc(vector bool int __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vandc(vector float __a, vector bool int __b) +{ + vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b; + return (vector float)__res; +} + +/* vec_avg */ + +static vector signed char __ATTRS_o_ai +vec_avg(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vavgsb(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_avg(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vavgub(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_avg(vector short __a, vector short __b) +{ + return __builtin_altivec_vavgsh(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_avg(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vavguh(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_avg(vector int __a, vector int __b) +{ + return __builtin_altivec_vavgsw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_avg(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vavguw(__a, __b); +} + +/* vec_vavgsb */ + +static vector signed char __attribute__((__always_inline__)) +vec_vavgsb(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vavgsb(__a, __b); +} + +/* vec_vavgub */ + +static vector unsigned char __attribute__((__always_inline__)) +vec_vavgub(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vavgub(__a, __b); +} + +/* vec_vavgsh */ + +static vector short __attribute__((__always_inline__)) +vec_vavgsh(vector short __a, vector short __b) +{ + return __builtin_altivec_vavgsh(__a, __b); +} + +/* vec_vavguh */ + +static vector unsigned short __attribute__((__always_inline__)) +vec_vavguh(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vavguh(__a, __b); +} + +/* vec_vavgsw */ + +static vector int __attribute__((__always_inline__)) +vec_vavgsw(vector int __a, vector int __b) +{ + return __builtin_altivec_vavgsw(__a, __b); +} + +/* vec_vavguw */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vavguw(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vavguw(__a, __b); +} + +/* vec_ceil */ + +static vector float __attribute__((__always_inline__)) +vec_ceil(vector float __a) +{ + return __builtin_altivec_vrfip(__a); +} + +/* vec_vrfip */ + +static vector float __attribute__((__always_inline__)) +vec_vrfip(vector float __a) +{ + return __builtin_altivec_vrfip(__a); +} + +/* vec_cmpb */ + +static vector int __attribute__((__always_inline__)) +vec_cmpb(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpbfp(__a, __b); +} + +/* vec_vcmpbfp */ + +static vector int __attribute__((__always_inline__)) +vec_vcmpbfp(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpbfp(__a, __b); +} + +/* vec_cmpeq */ + +static vector bool char __ATTRS_o_ai +vec_cmpeq(vector signed char __a, vector signed char __b) +{ + return (vector bool char) + __builtin_altivec_vcmpequb((vector char)__a, (vector char)__b); +} + +static vector bool char __ATTRS_o_ai +vec_cmpeq(vector unsigned char __a, vector unsigned char __b) +{ + return (vector bool char) + __builtin_altivec_vcmpequb((vector char)__a, (vector char)__b); +} + +static vector bool short __ATTRS_o_ai +vec_cmpeq(vector short __a, vector short __b) +{ + return (vector bool short)__builtin_altivec_vcmpequh(__a, __b); +} + +static vector bool short __ATTRS_o_ai +vec_cmpeq(vector unsigned short __a, vector unsigned short __b) +{ + return (vector bool short) + __builtin_altivec_vcmpequh((vector short)__a, (vector short)__b); +} + +static vector bool int __ATTRS_o_ai +vec_cmpeq(vector int __a, vector int __b) +{ + return (vector bool int)__builtin_altivec_vcmpequw(__a, __b); +} + +static vector bool int __ATTRS_o_ai +vec_cmpeq(vector unsigned int __a, vector unsigned int __b) +{ + return (vector bool int) + __builtin_altivec_vcmpequw((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_cmpeq(vector float __a, vector float __b) +{ + return (vector bool int)__builtin_altivec_vcmpeqfp(__a, __b); +} + +/* vec_cmpge */ + +static vector bool int __attribute__((__always_inline__)) +vec_cmpge(vector float __a, vector float __b) +{ + return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b); +} + +/* vec_vcmpgefp */ + +static vector bool int __attribute__((__always_inline__)) +vec_vcmpgefp(vector float __a, vector float __b) +{ + return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b); +} + +/* vec_cmpgt */ + +static vector bool char __ATTRS_o_ai +vec_cmpgt(vector signed char __a, vector signed char __b) +{ + return (vector bool char)__builtin_altivec_vcmpgtsb(__a, __b); +} + +static vector bool char __ATTRS_o_ai +vec_cmpgt(vector unsigned char __a, vector unsigned char __b) +{ + return (vector bool char)__builtin_altivec_vcmpgtub(__a, __b); +} + +static vector bool short __ATTRS_o_ai +vec_cmpgt(vector short __a, vector short __b) +{ + return (vector bool short)__builtin_altivec_vcmpgtsh(__a, __b); +} + +static vector bool short __ATTRS_o_ai +vec_cmpgt(vector unsigned short __a, vector unsigned short __b) +{ + return (vector bool short)__builtin_altivec_vcmpgtuh(__a, __b); +} + +static vector bool int __ATTRS_o_ai +vec_cmpgt(vector int __a, vector int __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtsw(__a, __b); +} + +static vector bool int __ATTRS_o_ai +vec_cmpgt(vector unsigned int __a, vector unsigned int __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtuw(__a, __b); +} + +static vector bool int __ATTRS_o_ai +vec_cmpgt(vector float __a, vector float __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b); +} + +/* vec_vcmpgtsb */ + +static vector bool char __attribute__((__always_inline__)) +vec_vcmpgtsb(vector signed char __a, vector signed char __b) +{ + return (vector bool char)__builtin_altivec_vcmpgtsb(__a, __b); +} + +/* vec_vcmpgtub */ + +static vector bool char __attribute__((__always_inline__)) +vec_vcmpgtub(vector unsigned char __a, vector unsigned char __b) +{ + return (vector bool char)__builtin_altivec_vcmpgtub(__a, __b); +} + +/* vec_vcmpgtsh */ + +static vector bool short __attribute__((__always_inline__)) +vec_vcmpgtsh(vector short __a, vector short __b) +{ + return (vector bool short)__builtin_altivec_vcmpgtsh(__a, __b); +} + +/* vec_vcmpgtuh */ + +static vector bool short __attribute__((__always_inline__)) +vec_vcmpgtuh(vector unsigned short __a, vector unsigned short __b) +{ + return (vector bool short)__builtin_altivec_vcmpgtuh(__a, __b); +} + +/* vec_vcmpgtsw */ + +static vector bool int __attribute__((__always_inline__)) +vec_vcmpgtsw(vector int __a, vector int __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtsw(__a, __b); +} + +/* vec_vcmpgtuw */ + +static vector bool int __attribute__((__always_inline__)) +vec_vcmpgtuw(vector unsigned int __a, vector unsigned int __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtuw(__a, __b); +} + +/* vec_vcmpgtfp */ + +static vector bool int __attribute__((__always_inline__)) +vec_vcmpgtfp(vector float __a, vector float __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b); +} + +/* vec_cmple */ + +static vector bool int __attribute__((__always_inline__)) +vec_cmple(vector float __a, vector float __b) +{ + return (vector bool int)__builtin_altivec_vcmpgefp(__b, __a); +} + +/* vec_cmplt */ + +static vector bool char __ATTRS_o_ai +vec_cmplt(vector signed char __a, vector signed char __b) +{ + return (vector bool char)__builtin_altivec_vcmpgtsb(__b, __a); +} + +static vector bool char __ATTRS_o_ai +vec_cmplt(vector unsigned char __a, vector unsigned char __b) +{ + return (vector bool char)__builtin_altivec_vcmpgtub(__b, __a); +} + +static vector bool short __ATTRS_o_ai +vec_cmplt(vector short __a, vector short __b) +{ + return (vector bool short)__builtin_altivec_vcmpgtsh(__b, __a); +} + +static vector bool short __ATTRS_o_ai +vec_cmplt(vector unsigned short __a, vector unsigned short __b) +{ + return (vector bool short)__builtin_altivec_vcmpgtuh(__b, __a); +} + +static vector bool int __ATTRS_o_ai +vec_cmplt(vector int __a, vector int __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtsw(__b, __a); +} + +static vector bool int __ATTRS_o_ai +vec_cmplt(vector unsigned int __a, vector unsigned int __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtuw(__b, __a); +} + +static vector bool int __ATTRS_o_ai +vec_cmplt(vector float __a, vector float __b) +{ + return (vector bool int)__builtin_altivec_vcmpgtfp(__b, __a); +} + +/* vec_ctf */ + +static vector float __ATTRS_o_ai +vec_ctf(vector int __a, int __b) +{ + return __builtin_altivec_vcfsx(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_ctf(vector unsigned int __a, int __b) +{ + return __builtin_altivec_vcfux((vector int)__a, __b); +} + +/* vec_vcfsx */ + +static vector float __attribute__((__always_inline__)) +vec_vcfsx(vector int __a, int __b) +{ + return __builtin_altivec_vcfsx(__a, __b); +} + +/* vec_vcfux */ + +static vector float __attribute__((__always_inline__)) +vec_vcfux(vector unsigned int __a, int __b) +{ + return __builtin_altivec_vcfux((vector int)__a, __b); +} + +/* vec_cts */ + +static vector int __attribute__((__always_inline__)) +vec_cts(vector float __a, int __b) +{ + return __builtin_altivec_vctsxs(__a, __b); +} + +/* vec_vctsxs */ + +static vector int __attribute__((__always_inline__)) +vec_vctsxs(vector float __a, int __b) +{ + return __builtin_altivec_vctsxs(__a, __b); +} + +/* vec_ctu */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_ctu(vector float __a, int __b) +{ + return __builtin_altivec_vctuxs(__a, __b); +} + +/* vec_vctuxs */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vctuxs(vector float __a, int __b) +{ + return __builtin_altivec_vctuxs(__a, __b); +} + +/* vec_dss */ + +static void __attribute__((__always_inline__)) +vec_dss(int __a) +{ + __builtin_altivec_dss(__a); +} + +/* vec_dssall */ + +static void __attribute__((__always_inline__)) +vec_dssall(void) +{ + __builtin_altivec_dssall(); +} + +/* vec_dst */ + +static void __attribute__((__always_inline__)) +vec_dst(const void *__a, int __b, int __c) +{ + __builtin_altivec_dst(__a, __b, __c); +} + +/* vec_dstst */ + +static void __attribute__((__always_inline__)) +vec_dstst(const void *__a, int __b, int __c) +{ + __builtin_altivec_dstst(__a, __b, __c); +} + +/* vec_dststt */ + +static void __attribute__((__always_inline__)) +vec_dststt(const void *__a, int __b, int __c) +{ + __builtin_altivec_dststt(__a, __b, __c); +} + +/* vec_dstt */ + +static void __attribute__((__always_inline__)) +vec_dstt(const void *__a, int __b, int __c) +{ + __builtin_altivec_dstt(__a, __b, __c); +} + +/* vec_expte */ + +static vector float __attribute__((__always_inline__)) +vec_expte(vector float __a) +{ + return __builtin_altivec_vexptefp(__a); +} + +/* vec_vexptefp */ + +static vector float __attribute__((__always_inline__)) +vec_vexptefp(vector float __a) +{ + return __builtin_altivec_vexptefp(__a); +} + +/* vec_floor */ + +static vector float __attribute__((__always_inline__)) +vec_floor(vector float __a) +{ + return __builtin_altivec_vrfim(__a); +} + +/* vec_vrfim */ + +static vector float __attribute__((__always_inline__)) +vec_vrfim(vector float __a) +{ + return __builtin_altivec_vrfim(__a); +} + +/* vec_ld */ + +static vector signed char __ATTRS_o_ai +vec_ld(int __a, const vector signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvx(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_ld(int __a, const signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_ld(int __a, const vector unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_ld(int __a, const unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvx(__a, __b); +} + +static vector bool char __ATTRS_o_ai +vec_ld(int __a, const vector bool char *__b) +{ + return (vector bool char)__builtin_altivec_lvx(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_ld(int __a, const vector short *__b) +{ + return (vector short)__builtin_altivec_lvx(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_ld(int __a, const short *__b) +{ + return (vector short)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_ld(int __a, const vector unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_ld(int __a, const unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvx(__a, __b); +} + +static vector bool short __ATTRS_o_ai +vec_ld(int __a, const vector bool short *__b) +{ + return (vector bool short)__builtin_altivec_lvx(__a, __b); +} + +static vector pixel __ATTRS_o_ai +vec_ld(int __a, const vector pixel *__b) +{ + return (vector pixel)__builtin_altivec_lvx(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_ld(int __a, const vector int *__b) +{ + return (vector int)__builtin_altivec_lvx(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_ld(int __a, const int *__b) +{ + return (vector int)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_ld(int __a, const vector unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_ld(int __a, const unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvx(__a, __b); +} + +static vector bool int __ATTRS_o_ai +vec_ld(int __a, const vector bool int *__b) +{ + return (vector bool int)__builtin_altivec_lvx(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_ld(int __a, const vector float *__b) +{ + return (vector float)__builtin_altivec_lvx(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_ld(int __a, const float *__b) +{ + return (vector float)__builtin_altivec_lvx(__a, __b); +} + +/* vec_lvx */ + +static vector signed char __ATTRS_o_ai +vec_lvx(int __a, const vector signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvx(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_lvx(int __a, const signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvx(int __a, const vector unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvx(int __a, const unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvx(__a, __b); +} + +static vector bool char __ATTRS_o_ai +vec_lvx(int __a, const vector bool char *__b) +{ + return (vector bool char)__builtin_altivec_lvx(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_lvx(int __a, const vector short *__b) +{ + return (vector short)__builtin_altivec_lvx(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_lvx(int __a, const short *__b) +{ + return (vector short)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvx(int __a, const vector unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvx(int __a, const unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvx(__a, __b); +} + +static vector bool short __ATTRS_o_ai +vec_lvx(int __a, const vector bool short *__b) +{ + return (vector bool short)__builtin_altivec_lvx(__a, __b); +} + +static vector pixel __ATTRS_o_ai +vec_lvx(int __a, const vector pixel *__b) +{ + return (vector pixel)__builtin_altivec_lvx(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_lvx(int __a, const vector int *__b) +{ + return (vector int)__builtin_altivec_lvx(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_lvx(int __a, const int *__b) +{ + return (vector int)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvx(int __a, const vector unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvx(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvx(int __a, const unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvx(__a, __b); +} + +static vector bool int __ATTRS_o_ai +vec_lvx(int __a, const vector bool int *__b) +{ + return (vector bool int)__builtin_altivec_lvx(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_lvx(int __a, const vector float *__b) +{ + return (vector float)__builtin_altivec_lvx(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_lvx(int __a, const float *__b) +{ + return (vector float)__builtin_altivec_lvx(__a, __b); +} + +/* vec_lde */ + +static vector signed char __ATTRS_o_ai +vec_lde(int __a, const signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvebx(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lde(int __a, const unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvebx(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_lde(int __a, const short *__b) +{ + return (vector short)__builtin_altivec_lvehx(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_lde(int __a, const unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvehx(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_lde(int __a, const int *__b) +{ + return (vector int)__builtin_altivec_lvewx(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_lde(int __a, const unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvewx(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_lde(int __a, const float *__b) +{ + return (vector float)__builtin_altivec_lvewx(__a, __b); +} + +/* vec_lvebx */ + +static vector signed char __ATTRS_o_ai +vec_lvebx(int __a, const signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvebx(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvebx(int __a, const unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvebx(__a, __b); +} + +/* vec_lvehx */ + +static vector short __ATTRS_o_ai +vec_lvehx(int __a, const short *__b) +{ + return (vector short)__builtin_altivec_lvehx(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvehx(int __a, const unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvehx(__a, __b); +} + +/* vec_lvewx */ + +static vector int __ATTRS_o_ai +vec_lvewx(int __a, const int *__b) +{ + return (vector int)__builtin_altivec_lvewx(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvewx(int __a, const unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvewx(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_lvewx(int __a, const float *__b) +{ + return (vector float)__builtin_altivec_lvewx(__a, __b); +} + +/* vec_ldl */ + +static vector signed char __ATTRS_o_ai +vec_ldl(int __a, const vector signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvxl(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_ldl(int __a, const signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_ldl(int __a, const vector unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_ldl(int __a, const unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvxl(__a, __b); +} + +static vector bool char __ATTRS_o_ai +vec_ldl(int __a, const vector bool char *__b) +{ + return (vector bool char)__builtin_altivec_lvxl(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_ldl(int __a, const vector short *__b) +{ + return (vector short)__builtin_altivec_lvxl(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_ldl(int __a, const short *__b) +{ + return (vector short)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_ldl(int __a, const vector unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_ldl(int __a, const unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvxl(__a, __b); +} + +static vector bool short __ATTRS_o_ai +vec_ldl(int __a, const vector bool short *__b) +{ + return (vector bool short)__builtin_altivec_lvxl(__a, __b); +} + +static vector pixel __ATTRS_o_ai +vec_ldl(int __a, const vector pixel *__b) +{ + return (vector pixel short)__builtin_altivec_lvxl(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_ldl(int __a, const vector int *__b) +{ + return (vector int)__builtin_altivec_lvxl(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_ldl(int __a, const int *__b) +{ + return (vector int)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_ldl(int __a, const vector unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_ldl(int __a, const unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvxl(__a, __b); +} + +static vector bool int __ATTRS_o_ai +vec_ldl(int __a, const vector bool int *__b) +{ + return (vector bool int)__builtin_altivec_lvxl(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_ldl(int __a, const vector float *__b) +{ + return (vector float)__builtin_altivec_lvxl(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_ldl(int __a, const float *__b) +{ + return (vector float)__builtin_altivec_lvxl(__a, __b); +} + +/* vec_lvxl */ + +static vector signed char __ATTRS_o_ai +vec_lvxl(int __a, const vector signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvxl(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_lvxl(int __a, const signed char *__b) +{ + return (vector signed char)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvxl(int __a, const vector unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvxl(int __a, const unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvxl(__a, __b); +} + +static vector bool char __ATTRS_o_ai +vec_lvxl(int __a, const vector bool char *__b) +{ + return (vector bool char)__builtin_altivec_lvxl(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_lvxl(int __a, const vector short *__b) +{ + return (vector short)__builtin_altivec_lvxl(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_lvxl(int __a, const short *__b) +{ + return (vector short)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvxl(int __a, const vector unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvxl(int __a, const unsigned short *__b) +{ + return (vector unsigned short)__builtin_altivec_lvxl(__a, __b); +} + +static vector bool short __ATTRS_o_ai +vec_lvxl(int __a, const vector bool short *__b) +{ + return (vector bool short)__builtin_altivec_lvxl(__a, __b); +} + +static vector pixel __ATTRS_o_ai +vec_lvxl(int __a, const vector pixel *__b) +{ + return (vector pixel)__builtin_altivec_lvxl(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_lvxl(int __a, const vector int *__b) +{ + return (vector int)__builtin_altivec_lvxl(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_lvxl(int __a, const int *__b) +{ + return (vector int)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvxl(int __a, const vector unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvxl(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvxl(int __a, const unsigned int *__b) +{ + return (vector unsigned int)__builtin_altivec_lvxl(__a, __b); +} + +static vector bool int __ATTRS_o_ai +vec_lvxl(int __a, const vector bool int *__b) +{ + return (vector bool int)__builtin_altivec_lvxl(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_lvxl(int __a, const vector float *__b) +{ + return (vector float)__builtin_altivec_lvxl(__a, __b); +} + +static vector float __ATTRS_o_ai +vec_lvxl(int __a, const float *__b) +{ + return (vector float)__builtin_altivec_lvxl(__a, __b); +} + +/* vec_loge */ + +static vector float __attribute__((__always_inline__)) +vec_loge(vector float __a) +{ + return __builtin_altivec_vlogefp(__a); +} + +/* vec_vlogefp */ + +static vector float __attribute__((__always_inline__)) +vec_vlogefp(vector float __a) +{ + return __builtin_altivec_vlogefp(__a); +} + +/* vec_lvsl */ + +static vector unsigned char __ATTRS_o_ai +vec_lvsl(int __a, const signed char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsl(int __a, const unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsl(int __a, const short *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsl(int __a, const unsigned short *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsl(int __a, const int *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsl(int __a, const unsigned int *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsl(int __a, const float *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsl(__a, __b); +} + +/* vec_lvsr */ + +static vector unsigned char __ATTRS_o_ai +vec_lvsr(int __a, const signed char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsr(int __a, const unsigned char *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsr(int __a, const short *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsr(int __a, const unsigned short *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsr(int __a, const int *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsr(int __a, const unsigned int *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvsr(int __a, const float *__b) +{ + return (vector unsigned char)__builtin_altivec_lvsr(__a, __b); +} + +/* vec_madd */ + +static vector float __attribute__((__always_inline__)) +vec_madd(vector float __a, vector float __b, vector float __c) +{ + return __builtin_altivec_vmaddfp(__a, __b, __c); +} + +/* vec_vmaddfp */ + +static vector float __attribute__((__always_inline__)) +vec_vmaddfp(vector float __a, vector float __b, vector float __c) +{ + return __builtin_altivec_vmaddfp(__a, __b, __c); +} + +/* vec_madds */ + +static vector signed short __attribute__((__always_inline__)) +vec_madds(vector signed short __a, vector signed short __b, vector signed short __c) +{ + return __builtin_altivec_vmhaddshs(__a, __b, __c); +} + +/* vec_vmhaddshs */ +static vector signed short __attribute__((__always_inline__)) +vec_vmhaddshs(vector signed short __a, + vector signed short __b, + vector signed short __c) +{ + return __builtin_altivec_vmhaddshs(__a, __b, __c); +} + +/* vec_max */ + +static vector signed char __ATTRS_o_ai +vec_max(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vmaxsb(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_max(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vmaxsb((vector signed char)__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_max(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vmaxsb(__a, (vector signed char)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_max(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vmaxub(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_max(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vmaxub((vector unsigned char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_max(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vmaxub(__a, (vector unsigned char)__b); +} + +static vector short __ATTRS_o_ai +vec_max(vector short __a, vector short __b) +{ + return __builtin_altivec_vmaxsh(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_max(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vmaxsh((vector short)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_max(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vmaxsh(__a, (vector short)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_max(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vmaxuh(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_max(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vmaxuh((vector unsigned short)__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_max(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vmaxuh(__a, (vector unsigned short)__b); +} + +static vector int __ATTRS_o_ai +vec_max(vector int __a, vector int __b) +{ + return __builtin_altivec_vmaxsw(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_max(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vmaxsw((vector int)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_max(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vmaxsw(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_max(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vmaxuw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_max(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vmaxuw((vector unsigned int)__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_max(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vmaxuw(__a, (vector unsigned int)__b); +} + +static vector float __ATTRS_o_ai +vec_max(vector float __a, vector float __b) +{ + return __builtin_altivec_vmaxfp(__a, __b); +} + +/* vec_vmaxsb */ + +static vector signed char __ATTRS_o_ai +vec_vmaxsb(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vmaxsb(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_vmaxsb(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vmaxsb((vector signed char)__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_vmaxsb(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vmaxsb(__a, (vector signed char)__b); +} + +/* vec_vmaxub */ + +static vector unsigned char __ATTRS_o_ai +vec_vmaxub(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vmaxub(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vmaxub(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vmaxub((vector unsigned char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vmaxub(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vmaxub(__a, (vector unsigned char)__b); +} + +/* vec_vmaxsh */ + +static vector short __ATTRS_o_ai +vec_vmaxsh(vector short __a, vector short __b) +{ + return __builtin_altivec_vmaxsh(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_vmaxsh(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vmaxsh((vector short)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_vmaxsh(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vmaxsh(__a, (vector short)__b); +} + +/* vec_vmaxuh */ + +static vector unsigned short __ATTRS_o_ai +vec_vmaxuh(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vmaxuh(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vmaxuh(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vmaxuh((vector unsigned short)__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vmaxuh(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vmaxuh(__a, (vector unsigned short)__b); +} + +/* vec_vmaxsw */ + +static vector int __ATTRS_o_ai +vec_vmaxsw(vector int __a, vector int __b) +{ + return __builtin_altivec_vmaxsw(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_vmaxsw(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vmaxsw((vector int)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_vmaxsw(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vmaxsw(__a, (vector int)__b); +} + +/* vec_vmaxuw */ + +static vector unsigned int __ATTRS_o_ai +vec_vmaxuw(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vmaxuw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vmaxuw(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vmaxuw((vector unsigned int)__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vmaxuw(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vmaxuw(__a, (vector unsigned int)__b); +} + +/* vec_vmaxfp */ + +static vector float __attribute__((__always_inline__)) +vec_vmaxfp(vector float __a, vector float __b) +{ + return __builtin_altivec_vmaxfp(__a, __b); +} + +/* vec_mergeh */ + +static vector signed char __ATTRS_o_ai +vec_mergeh(vector signed char __a, vector signed char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); +} + +static vector unsigned char __ATTRS_o_ai +vec_mergeh(vector unsigned char __a, vector unsigned char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); +} + +static vector bool char __ATTRS_o_ai +vec_mergeh(vector bool char __a, vector bool char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); +} + +static vector short __ATTRS_o_ai +vec_mergeh(vector short __a, vector short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector unsigned short __ATTRS_o_ai +vec_mergeh(vector unsigned short __a, vector unsigned short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector bool short __ATTRS_o_ai +vec_mergeh(vector bool short __a, vector bool short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector pixel __ATTRS_o_ai +vec_mergeh(vector pixel __a, vector pixel __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector int __ATTRS_o_ai +vec_mergeh(vector int __a, vector int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + +static vector unsigned int __ATTRS_o_ai +vec_mergeh(vector unsigned int __a, vector unsigned int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + +static vector bool int __ATTRS_o_ai +vec_mergeh(vector bool int __a, vector bool int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + +static vector float __ATTRS_o_ai +vec_mergeh(vector float __a, vector float __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + +/* vec_vmrghb */ + +#define __builtin_altivec_vmrghb vec_vmrghb + +static vector signed char __ATTRS_o_ai +vec_vmrghb(vector signed char __a, vector signed char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); +} + +static vector unsigned char __ATTRS_o_ai +vec_vmrghb(vector unsigned char __a, vector unsigned char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); +} + +static vector bool char __ATTRS_o_ai +vec_vmrghb(vector bool char __a, vector bool char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17)); +} + +/* vec_vmrghh */ + +#define __builtin_altivec_vmrghh vec_vmrghh + +static vector short __ATTRS_o_ai +vec_vmrghh(vector short __a, vector short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector unsigned short __ATTRS_o_ai +vec_vmrghh(vector unsigned short __a, vector unsigned short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector bool short __ATTRS_o_ai +vec_vmrghh(vector bool short __a, vector bool short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +static vector pixel __ATTRS_o_ai +vec_vmrghh(vector pixel __a, vector pixel __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); +} + +/* vec_vmrghw */ + +#define __builtin_altivec_vmrghw vec_vmrghw + +static vector int __ATTRS_o_ai +vec_vmrghw(vector int __a, vector int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + +static vector unsigned int __ATTRS_o_ai +vec_vmrghw(vector unsigned int __a, vector unsigned int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + +static vector bool int __ATTRS_o_ai +vec_vmrghw(vector bool int __a, vector bool int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + +static vector float __ATTRS_o_ai +vec_vmrghw(vector float __a, vector float __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17)); +} + +/* vec_mergel */ + +static vector signed char __ATTRS_o_ai +vec_mergel(vector signed char __a, vector signed char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); +} + +static vector unsigned char __ATTRS_o_ai +vec_mergel(vector unsigned char __a, vector unsigned char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); +} + +static vector bool char __ATTRS_o_ai +vec_mergel(vector bool char __a, vector bool char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); +} + +static vector short __ATTRS_o_ai +vec_mergel(vector short __a, vector short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector unsigned short __ATTRS_o_ai +vec_mergel(vector unsigned short __a, vector unsigned short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector bool short __ATTRS_o_ai +vec_mergel(vector bool short __a, vector bool short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector pixel __ATTRS_o_ai +vec_mergel(vector pixel __a, vector pixel __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector int __ATTRS_o_ai +vec_mergel(vector int __a, vector int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static vector unsigned int __ATTRS_o_ai +vec_mergel(vector unsigned int __a, vector unsigned int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static vector bool int __ATTRS_o_ai +vec_mergel(vector bool int __a, vector bool int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static vector float __ATTRS_o_ai +vec_mergel(vector float __a, vector float __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +/* vec_vmrglb */ + +#define __builtin_altivec_vmrglb vec_vmrglb + +static vector signed char __ATTRS_o_ai +vec_vmrglb(vector signed char __a, vector signed char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); +} + +static vector unsigned char __ATTRS_o_ai +vec_vmrglb(vector unsigned char __a, vector unsigned char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); +} + +static vector bool char __ATTRS_o_ai +vec_vmrglb(vector bool char __a, vector bool char __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F)); +} + +/* vec_vmrglh */ + +#define __builtin_altivec_vmrglh vec_vmrglh + +static vector short __ATTRS_o_ai +vec_vmrglh(vector short __a, vector short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector unsigned short __ATTRS_o_ai +vec_vmrglh(vector unsigned short __a, vector unsigned short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector bool short __ATTRS_o_ai +vec_vmrglh(vector bool short __a, vector bool short __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +static vector pixel __ATTRS_o_ai +vec_vmrglh(vector pixel __a, vector pixel __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); +} + +/* vec_vmrglw */ + +#define __builtin_altivec_vmrglw vec_vmrglw + +static vector int __ATTRS_o_ai +vec_vmrglw(vector int __a, vector int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static vector unsigned int __ATTRS_o_ai +vec_vmrglw(vector unsigned int __a, vector unsigned int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static vector bool int __ATTRS_o_ai +vec_vmrglw(vector bool int __a, vector bool int __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +static vector float __ATTRS_o_ai +vec_vmrglw(vector float __a, vector float __b) +{ + return vec_perm(__a, __b, (vector unsigned char) + (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F)); +} + +/* vec_mfvscr */ + +static vector unsigned short __attribute__((__always_inline__)) +vec_mfvscr(void) +{ + return __builtin_altivec_mfvscr(); +} + +/* vec_min */ + +static vector signed char __ATTRS_o_ai +vec_min(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vminsb(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_min(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vminsb((vector signed char)__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_min(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vminsb(__a, (vector signed char)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_min(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vminub(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_min(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vminub((vector unsigned char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_min(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vminub(__a, (vector unsigned char)__b); +} + +static vector short __ATTRS_o_ai +vec_min(vector short __a, vector short __b) +{ + return __builtin_altivec_vminsh(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_min(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vminsh((vector short)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_min(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vminsh(__a, (vector short)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_min(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vminuh(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_min(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vminuh((vector unsigned short)__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_min(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vminuh(__a, (vector unsigned short)__b); +} + +static vector int __ATTRS_o_ai +vec_min(vector int __a, vector int __b) +{ + return __builtin_altivec_vminsw(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_min(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vminsw((vector int)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_min(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vminsw(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_min(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vminuw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_min(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vminuw((vector unsigned int)__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_min(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vminuw(__a, (vector unsigned int)__b); +} + +static vector float __ATTRS_o_ai +vec_min(vector float __a, vector float __b) +{ + return __builtin_altivec_vminfp(__a, __b); +} + +/* vec_vminsb */ + +static vector signed char __ATTRS_o_ai +vec_vminsb(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vminsb(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_vminsb(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vminsb((vector signed char)__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_vminsb(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vminsb(__a, (vector signed char)__b); +} + +/* vec_vminub */ + +static vector unsigned char __ATTRS_o_ai +vec_vminub(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vminub(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vminub(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vminub((vector unsigned char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vminub(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vminub(__a, (vector unsigned char)__b); +} + +/* vec_vminsh */ + +static vector short __ATTRS_o_ai +vec_vminsh(vector short __a, vector short __b) +{ + return __builtin_altivec_vminsh(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_vminsh(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vminsh((vector short)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_vminsh(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vminsh(__a, (vector short)__b); +} + +/* vec_vminuh */ + +static vector unsigned short __ATTRS_o_ai +vec_vminuh(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vminuh(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vminuh(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vminuh((vector unsigned short)__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vminuh(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vminuh(__a, (vector unsigned short)__b); +} + +/* vec_vminsw */ + +static vector int __ATTRS_o_ai +vec_vminsw(vector int __a, vector int __b) +{ + return __builtin_altivec_vminsw(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_vminsw(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vminsw((vector int)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_vminsw(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vminsw(__a, (vector int)__b); +} + +/* vec_vminuw */ + +static vector unsigned int __ATTRS_o_ai +vec_vminuw(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vminuw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vminuw(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vminuw((vector unsigned int)__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vminuw(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vminuw(__a, (vector unsigned int)__b); +} + +/* vec_vminfp */ + +static vector float __attribute__((__always_inline__)) +vec_vminfp(vector float __a, vector float __b) +{ + return __builtin_altivec_vminfp(__a, __b); +} + +/* vec_mladd */ + +#define __builtin_altivec_vmladduhm vec_mladd + +static vector short __ATTRS_o_ai +vec_mladd(vector short __a, vector short __b, vector short __c) +{ + return __a * __b + __c; +} + +static vector short __ATTRS_o_ai +vec_mladd(vector short __a, vector unsigned short __b, vector unsigned short __c) +{ + return __a * (vector short)__b + (vector short)__c; +} + +static vector short __ATTRS_o_ai +vec_mladd(vector unsigned short __a, vector short __b, vector short __c) +{ + return (vector short)__a * __b + __c; +} + +static vector unsigned short __ATTRS_o_ai +vec_mladd(vector unsigned short __a, + vector unsigned short __b, + vector unsigned short __c) +{ + return __a * __b + __c; +} + +/* vec_vmladduhm */ + +static vector short __ATTRS_o_ai +vec_vmladduhm(vector short __a, vector short __b, vector short __c) +{ + return __a * __b + __c; +} + +static vector short __ATTRS_o_ai +vec_vmladduhm(vector short __a, vector unsigned short __b, vector unsigned short __c) +{ + return __a * (vector short)__b + (vector short)__c; +} + +static vector short __ATTRS_o_ai +vec_vmladduhm(vector unsigned short __a, vector short __b, vector short __c) +{ + return (vector short)__a * __b + __c; +} + +static vector unsigned short __ATTRS_o_ai +vec_vmladduhm(vector unsigned short __a, + vector unsigned short __b, + vector unsigned short __c) +{ + return __a * __b + __c; +} + +/* vec_mradds */ + +static vector short __attribute__((__always_inline__)) +vec_mradds(vector short __a, vector short __b, vector short __c) +{ + return __builtin_altivec_vmhraddshs(__a, __b, __c); +} + +/* vec_vmhraddshs */ + +static vector short __attribute__((__always_inline__)) +vec_vmhraddshs(vector short __a, vector short __b, vector short __c) +{ + return __builtin_altivec_vmhraddshs(__a, __b, __c); +} + +/* vec_msum */ + +static vector int __ATTRS_o_ai +vec_msum(vector signed char __a, vector unsigned char __b, vector int __c) +{ + return __builtin_altivec_vmsummbm(__a, __b, __c); +} + +static vector unsigned int __ATTRS_o_ai +vec_msum(vector unsigned char __a, vector unsigned char __b, vector unsigned int __c) +{ + return __builtin_altivec_vmsumubm(__a, __b, __c); +} + +static vector int __ATTRS_o_ai +vec_msum(vector short __a, vector short __b, vector int __c) +{ + return __builtin_altivec_vmsumshm(__a, __b, __c); +} + +static vector unsigned int __ATTRS_o_ai +vec_msum(vector unsigned short __a, + vector unsigned short __b, + vector unsigned int __c) +{ + return __builtin_altivec_vmsumuhm(__a, __b, __c); +} + +/* vec_vmsummbm */ + +static vector int __attribute__((__always_inline__)) +vec_vmsummbm(vector signed char __a, vector unsigned char __b, vector int __c) +{ + return __builtin_altivec_vmsummbm(__a, __b, __c); +} + +/* vec_vmsumubm */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vmsumubm(vector unsigned char __a, + vector unsigned char __b, + vector unsigned int __c) +{ + return __builtin_altivec_vmsumubm(__a, __b, __c); +} + +/* vec_vmsumshm */ + +static vector int __attribute__((__always_inline__)) +vec_vmsumshm(vector short __a, vector short __b, vector int __c) +{ + return __builtin_altivec_vmsumshm(__a, __b, __c); +} + +/* vec_vmsumuhm */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vmsumuhm(vector unsigned short __a, + vector unsigned short __b, + vector unsigned int __c) +{ + return __builtin_altivec_vmsumuhm(__a, __b, __c); +} + +/* vec_msums */ + +static vector int __ATTRS_o_ai +vec_msums(vector short __a, vector short __b, vector int __c) +{ + return __builtin_altivec_vmsumshs(__a, __b, __c); +} + +static vector unsigned int __ATTRS_o_ai +vec_msums(vector unsigned short __a, + vector unsigned short __b, + vector unsigned int __c) +{ + return __builtin_altivec_vmsumuhs(__a, __b, __c); +} + +/* vec_vmsumshs */ + +static vector int __attribute__((__always_inline__)) +vec_vmsumshs(vector short __a, vector short __b, vector int __c) +{ + return __builtin_altivec_vmsumshs(__a, __b, __c); +} + +/* vec_vmsumuhs */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vmsumuhs(vector unsigned short __a, + vector unsigned short __b, + vector unsigned int __c) +{ + return __builtin_altivec_vmsumuhs(__a, __b, __c); +} + +/* vec_mtvscr */ + +static void __ATTRS_o_ai +vec_mtvscr(vector signed char __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector unsigned char __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector bool char __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector short __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector unsigned short __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector bool short __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector pixel __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector int __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector unsigned int __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector bool int __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +static void __ATTRS_o_ai +vec_mtvscr(vector float __a) +{ + __builtin_altivec_mtvscr((vector int)__a); +} + +/* vec_mule */ + +static vector short __ATTRS_o_ai +vec_mule(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vmulesb(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_mule(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vmuleub(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_mule(vector short __a, vector short __b) +{ + return __builtin_altivec_vmulesh(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_mule(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vmuleuh(__a, __b); +} + +/* vec_vmulesb */ + +static vector short __attribute__((__always_inline__)) +vec_vmulesb(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vmulesb(__a, __b); +} + +/* vec_vmuleub */ + +static vector unsigned short __attribute__((__always_inline__)) +vec_vmuleub(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vmuleub(__a, __b); +} + +/* vec_vmulesh */ + +static vector int __attribute__((__always_inline__)) +vec_vmulesh(vector short __a, vector short __b) +{ + return __builtin_altivec_vmulesh(__a, __b); +} + +/* vec_vmuleuh */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vmuleuh(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vmuleuh(__a, __b); +} + +/* vec_mulo */ + +static vector short __ATTRS_o_ai +vec_mulo(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vmulosb(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_mulo(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vmuloub(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_mulo(vector short __a, vector short __b) +{ + return __builtin_altivec_vmulosh(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_mulo(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vmulouh(__a, __b); +} + +/* vec_vmulosb */ + +static vector short __attribute__((__always_inline__)) +vec_vmulosb(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vmulosb(__a, __b); +} + +/* vec_vmuloub */ + +static vector unsigned short __attribute__((__always_inline__)) +vec_vmuloub(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vmuloub(__a, __b); +} + +/* vec_vmulosh */ + +static vector int __attribute__((__always_inline__)) +vec_vmulosh(vector short __a, vector short __b) +{ + return __builtin_altivec_vmulosh(__a, __b); +} + +/* vec_vmulouh */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vmulouh(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vmulouh(__a, __b); +} + +/* vec_nmsub */ + +static vector float __attribute__((__always_inline__)) +vec_nmsub(vector float __a, vector float __b, vector float __c) +{ + return __builtin_altivec_vnmsubfp(__a, __b, __c); +} + +/* vec_vnmsubfp */ + +static vector float __attribute__((__always_inline__)) +vec_vnmsubfp(vector float __a, vector float __b, vector float __c) +{ + return __builtin_altivec_vnmsubfp(__a, __b, __c); +} + +/* vec_nor */ + +#define __builtin_altivec_vnor vec_nor + +static vector signed char __ATTRS_o_ai +vec_nor(vector signed char __a, vector signed char __b) +{ + return ~(__a | __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_nor(vector unsigned char __a, vector unsigned char __b) +{ + return ~(__a | __b); +} + +static vector bool char __ATTRS_o_ai +vec_nor(vector bool char __a, vector bool char __b) +{ + return ~(__a | __b); +} + +static vector short __ATTRS_o_ai +vec_nor(vector short __a, vector short __b) +{ + return ~(__a | __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_nor(vector unsigned short __a, vector unsigned short __b) +{ + return ~(__a | __b); +} + +static vector bool short __ATTRS_o_ai +vec_nor(vector bool short __a, vector bool short __b) +{ + return ~(__a | __b); +} + +static vector int __ATTRS_o_ai +vec_nor(vector int __a, vector int __b) +{ + return ~(__a | __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_nor(vector unsigned int __a, vector unsigned int __b) +{ + return ~(__a | __b); +} + +static vector bool int __ATTRS_o_ai +vec_nor(vector bool int __a, vector bool int __b) +{ + return ~(__a | __b); +} + +static vector float __ATTRS_o_ai +vec_nor(vector float __a, vector float __b) +{ + vector unsigned int __res = ~((vector unsigned int)__a | (vector unsigned int)__b); + return (vector float)__res; +} + +/* vec_vnor */ + +static vector signed char __ATTRS_o_ai +vec_vnor(vector signed char __a, vector signed char __b) +{ + return ~(__a | __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vnor(vector unsigned char __a, vector unsigned char __b) +{ + return ~(__a | __b); +} + +static vector bool char __ATTRS_o_ai +vec_vnor(vector bool char __a, vector bool char __b) +{ + return ~(__a | __b); +} + +static vector short __ATTRS_o_ai +vec_vnor(vector short __a, vector short __b) +{ + return ~(__a | __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vnor(vector unsigned short __a, vector unsigned short __b) +{ + return ~(__a | __b); +} + +static vector bool short __ATTRS_o_ai +vec_vnor(vector bool short __a, vector bool short __b) +{ + return ~(__a | __b); +} + +static vector int __ATTRS_o_ai +vec_vnor(vector int __a, vector int __b) +{ + return ~(__a | __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vnor(vector unsigned int __a, vector unsigned int __b) +{ + return ~(__a | __b); +} + +static vector bool int __ATTRS_o_ai +vec_vnor(vector bool int __a, vector bool int __b) +{ + return ~(__a | __b); +} + +static vector float __ATTRS_o_ai +vec_vnor(vector float __a, vector float __b) +{ + vector unsigned int __res = ~((vector unsigned int)__a | (vector unsigned int)__b); + return (vector float)__res; +} + +/* vec_or */ + +#define __builtin_altivec_vor vec_or + +static vector signed char __ATTRS_o_ai +vec_or(vector signed char __a, vector signed char __b) +{ + return __a | __b; +} + +static vector signed char __ATTRS_o_ai +vec_or(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a | __b; +} + +static vector signed char __ATTRS_o_ai +vec_or(vector signed char __a, vector bool char __b) +{ + return __a | (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_or(vector unsigned char __a, vector unsigned char __b) +{ + return __a | __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_or(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a | __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_or(vector unsigned char __a, vector bool char __b) +{ + return __a | (vector unsigned char)__b; +} + +static vector bool char __ATTRS_o_ai +vec_or(vector bool char __a, vector bool char __b) +{ + return __a | __b; +} + +static vector short __ATTRS_o_ai +vec_or(vector short __a, vector short __b) +{ + return __a | __b; +} + +static vector short __ATTRS_o_ai +vec_or(vector bool short __a, vector short __b) +{ + return (vector short)__a | __b; +} + +static vector short __ATTRS_o_ai +vec_or(vector short __a, vector bool short __b) +{ + return __a | (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_or(vector unsigned short __a, vector unsigned short __b) +{ + return __a | __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_or(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a | __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_or(vector unsigned short __a, vector bool short __b) +{ + return __a | (vector unsigned short)__b; +} + +static vector bool short __ATTRS_o_ai +vec_or(vector bool short __a, vector bool short __b) +{ + return __a | __b; +} + +static vector int __ATTRS_o_ai +vec_or(vector int __a, vector int __b) +{ + return __a | __b; +} + +static vector int __ATTRS_o_ai +vec_or(vector bool int __a, vector int __b) +{ + return (vector int)__a | __b; +} + +static vector int __ATTRS_o_ai +vec_or(vector int __a, vector bool int __b) +{ + return __a | (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_or(vector unsigned int __a, vector unsigned int __b) +{ + return __a | __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_or(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a | __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_or(vector unsigned int __a, vector bool int __b) +{ + return __a | (vector unsigned int)__b; +} + +static vector bool int __ATTRS_o_ai +vec_or(vector bool int __a, vector bool int __b) +{ + return __a | __b; +} + +static vector float __ATTRS_o_ai +vec_or(vector float __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_or(vector bool int __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_or(vector float __a, vector bool int __b) +{ + vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; + return (vector float)__res; +} + +/* vec_vor */ + +static vector signed char __ATTRS_o_ai +vec_vor(vector signed char __a, vector signed char __b) +{ + return __a | __b; +} + +static vector signed char __ATTRS_o_ai +vec_vor(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a | __b; +} + +static vector signed char __ATTRS_o_ai +vec_vor(vector signed char __a, vector bool char __b) +{ + return __a | (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vor(vector unsigned char __a, vector unsigned char __b) +{ + return __a | __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vor(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a | __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vor(vector unsigned char __a, vector bool char __b) +{ + return __a | (vector unsigned char)__b; +} + +static vector bool char __ATTRS_o_ai +vec_vor(vector bool char __a, vector bool char __b) +{ + return __a | __b; +} + +static vector short __ATTRS_o_ai +vec_vor(vector short __a, vector short __b) +{ + return __a | __b; +} + +static vector short __ATTRS_o_ai +vec_vor(vector bool short __a, vector short __b) +{ + return (vector short)__a | __b; +} + +static vector short __ATTRS_o_ai +vec_vor(vector short __a, vector bool short __b) +{ + return __a | (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vor(vector unsigned short __a, vector unsigned short __b) +{ + return __a | __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vor(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a | __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vor(vector unsigned short __a, vector bool short __b) +{ + return __a | (vector unsigned short)__b; +} + +static vector bool short __ATTRS_o_ai +vec_vor(vector bool short __a, vector bool short __b) +{ + return __a | __b; +} + +static vector int __ATTRS_o_ai +vec_vor(vector int __a, vector int __b) +{ + return __a | __b; +} + +static vector int __ATTRS_o_ai +vec_vor(vector bool int __a, vector int __b) +{ + return (vector int)__a | __b; +} + +static vector int __ATTRS_o_ai +vec_vor(vector int __a, vector bool int __b) +{ + return __a | (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vor(vector unsigned int __a, vector unsigned int __b) +{ + return __a | __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vor(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a | __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vor(vector unsigned int __a, vector bool int __b) +{ + return __a | (vector unsigned int)__b; +} + +static vector bool int __ATTRS_o_ai +vec_vor(vector bool int __a, vector bool int __b) +{ + return __a | __b; +} + +static vector float __ATTRS_o_ai +vec_vor(vector float __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vor(vector bool int __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vor(vector float __a, vector bool int __b) +{ + vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b; + return (vector float)__res; +} + +/* vec_pack */ + +static vector signed char __ATTRS_o_ai +vec_pack(vector signed short __a, vector signed short __b) +{ + return (vector signed char)vec_perm(__a, __b, (vector unsigned char) + (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); +} + +static vector unsigned char __ATTRS_o_ai +vec_pack(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned char)vec_perm(__a, __b, (vector unsigned char) + (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); +} + +static vector bool char __ATTRS_o_ai +vec_pack(vector bool short __a, vector bool short __b) +{ + return (vector bool char)vec_perm(__a, __b, (vector unsigned char) + (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); +} + +static vector short __ATTRS_o_ai +vec_pack(vector int __a, vector int __b) +{ + return (vector short)vec_perm(__a, __b, (vector unsigned char) + (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); +} + +static vector unsigned short __ATTRS_o_ai +vec_pack(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned short)vec_perm(__a, __b, (vector unsigned char) + (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); +} + +static vector bool short __ATTRS_o_ai +vec_pack(vector bool int __a, vector bool int __b) +{ + return (vector bool short)vec_perm(__a, __b, (vector unsigned char) + (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); +} + +/* vec_vpkuhum */ + +#define __builtin_altivec_vpkuhum vec_vpkuhum + +static vector signed char __ATTRS_o_ai +vec_vpkuhum(vector signed short __a, vector signed short __b) +{ + return (vector signed char)vec_perm(__a, __b, (vector unsigned char) + (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); +} + +static vector unsigned char __ATTRS_o_ai +vec_vpkuhum(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned char)vec_perm(__a, __b, (vector unsigned char) + (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); +} + +static vector bool char __ATTRS_o_ai +vec_vpkuhum(vector bool short __a, vector bool short __b) +{ + return (vector bool char)vec_perm(__a, __b, (vector unsigned char) + (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); +} + +/* vec_vpkuwum */ + +#define __builtin_altivec_vpkuwum vec_vpkuwum + +static vector short __ATTRS_o_ai +vec_vpkuwum(vector int __a, vector int __b) +{ + return (vector short)vec_perm(__a, __b, (vector unsigned char) + (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); +} + +static vector unsigned short __ATTRS_o_ai +vec_vpkuwum(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned short)vec_perm(__a, __b, (vector unsigned char) + (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); +} + +static vector bool short __ATTRS_o_ai +vec_vpkuwum(vector bool int __a, vector bool int __b) +{ + return (vector bool short)vec_perm(__a, __b, (vector unsigned char) + (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, + 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); +} + +/* vec_packpx */ + +static vector pixel __attribute__((__always_inline__)) +vec_packpx(vector unsigned int __a, vector unsigned int __b) +{ + return (vector pixel)__builtin_altivec_vpkpx(__a, __b); +} + +/* vec_vpkpx */ + +static vector pixel __attribute__((__always_inline__)) +vec_vpkpx(vector unsigned int __a, vector unsigned int __b) +{ + return (vector pixel)__builtin_altivec_vpkpx(__a, __b); +} + +/* vec_packs */ + +static vector signed char __ATTRS_o_ai +vec_packs(vector short __a, vector short __b) +{ + return __builtin_altivec_vpkshss(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_packs(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vpkuhus(__a, __b); +} + +static vector signed short __ATTRS_o_ai +vec_packs(vector int __a, vector int __b) +{ + return __builtin_altivec_vpkswss(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_packs(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vpkuwus(__a, __b); +} + +/* vec_vpkshss */ + +static vector signed char __attribute__((__always_inline__)) +vec_vpkshss(vector short __a, vector short __b) +{ + return __builtin_altivec_vpkshss(__a, __b); +} + +/* vec_vpkuhus */ + +static vector unsigned char __attribute__((__always_inline__)) +vec_vpkuhus(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vpkuhus(__a, __b); +} + +/* vec_vpkswss */ + +static vector signed short __attribute__((__always_inline__)) +vec_vpkswss(vector int __a, vector int __b) +{ + return __builtin_altivec_vpkswss(__a, __b); +} + +/* vec_vpkuwus */ + +static vector unsigned short __attribute__((__always_inline__)) +vec_vpkuwus(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vpkuwus(__a, __b); +} + +/* vec_packsu */ + +static vector unsigned char __ATTRS_o_ai +vec_packsu(vector short __a, vector short __b) +{ + return __builtin_altivec_vpkshus(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_packsu(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vpkuhus(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_packsu(vector int __a, vector int __b) +{ + return __builtin_altivec_vpkswus(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_packsu(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vpkuwus(__a, __b); +} + +/* vec_vpkshus */ + +static vector unsigned char __ATTRS_o_ai +vec_vpkshus(vector short __a, vector short __b) +{ + return __builtin_altivec_vpkshus(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vpkshus(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vpkuhus(__a, __b); +} + +/* vec_vpkswus */ + +static vector unsigned short __ATTRS_o_ai +vec_vpkswus(vector int __a, vector int __b) +{ + return __builtin_altivec_vpkswus(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vpkswus(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vpkuwus(__a, __b); +} + +/* vec_perm */ + +vector signed char __ATTRS_o_ai +vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c) +{ + return (vector signed char) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector unsigned char __ATTRS_o_ai +vec_perm(vector unsigned char __a, + vector unsigned char __b, + vector unsigned char __c) +{ + return (vector unsigned char) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector bool char __ATTRS_o_ai +vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c) +{ + return (vector bool char) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector short __ATTRS_o_ai +vec_perm(vector short __a, vector short __b, vector unsigned char __c) +{ + return (vector short) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector unsigned short __ATTRS_o_ai +vec_perm(vector unsigned short __a, + vector unsigned short __b, + vector unsigned char __c) +{ + return (vector unsigned short) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector bool short __ATTRS_o_ai +vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c) +{ + return (vector bool short) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector pixel __ATTRS_o_ai +vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c) +{ + return (vector pixel) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector int __ATTRS_o_ai +vec_perm(vector int __a, vector int __b, vector unsigned char __c) +{ + return (vector int)__builtin_altivec_vperm_4si(__a, __b, __c); +} + +vector unsigned int __ATTRS_o_ai +vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c) +{ + return (vector unsigned int) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector bool int __ATTRS_o_ai +vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c) +{ + return (vector bool int) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +vector float __ATTRS_o_ai +vec_perm(vector float __a, vector float __b, vector unsigned char __c) +{ + return (vector float) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +/* vec_vperm */ + +static vector signed char __ATTRS_o_ai +vec_vperm(vector signed char __a, vector signed char __b, vector unsigned char __c) +{ + return (vector signed char) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector unsigned char __ATTRS_o_ai +vec_vperm(vector unsigned char __a, + vector unsigned char __b, + vector unsigned char __c) +{ + return (vector unsigned char) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector bool char __ATTRS_o_ai +vec_vperm(vector bool char __a, vector bool char __b, vector unsigned char __c) +{ + return (vector bool char) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector short __ATTRS_o_ai +vec_vperm(vector short __a, vector short __b, vector unsigned char __c) +{ + return (vector short) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector unsigned short __ATTRS_o_ai +vec_vperm(vector unsigned short __a, + vector unsigned short __b, + vector unsigned char __c) +{ + return (vector unsigned short) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector bool short __ATTRS_o_ai +vec_vperm(vector bool short __a, vector bool short __b, vector unsigned char __c) +{ + return (vector bool short) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector pixel __ATTRS_o_ai +vec_vperm(vector pixel __a, vector pixel __b, vector unsigned char __c) +{ + return (vector pixel) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector int __ATTRS_o_ai +vec_vperm(vector int __a, vector int __b, vector unsigned char __c) +{ + return (vector int)__builtin_altivec_vperm_4si(__a, __b, __c); +} + +static vector unsigned int __ATTRS_o_ai +vec_vperm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c) +{ + return (vector unsigned int) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector bool int __ATTRS_o_ai +vec_vperm(vector bool int __a, vector bool int __b, vector unsigned char __c) +{ + return (vector bool int) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +static vector float __ATTRS_o_ai +vec_vperm(vector float __a, vector float __b, vector unsigned char __c) +{ + return (vector float) + __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c); +} + +/* vec_re */ + +static vector float __attribute__((__always_inline__)) +vec_re(vector float __a) +{ + return __builtin_altivec_vrefp(__a); +} + +/* vec_vrefp */ + +static vector float __attribute__((__always_inline__)) +vec_vrefp(vector float __a) +{ + return __builtin_altivec_vrefp(__a); +} + +/* vec_rl */ + +static vector signed char __ATTRS_o_ai +vec_rl(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_rl(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char)__builtin_altivec_vrlb((vector char)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_rl(vector short __a, vector unsigned short __b) +{ + return __builtin_altivec_vrlh(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_rl(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned short)__builtin_altivec_vrlh((vector short)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_rl(vector int __a, vector unsigned int __b) +{ + return __builtin_altivec_vrlw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_rl(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned int)__builtin_altivec_vrlw((vector int)__a, __b); +} + +/* vec_vrlb */ + +static vector signed char __ATTRS_o_ai +vec_vrlb(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vrlb(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char)__builtin_altivec_vrlb((vector char)__a, __b); +} + +/* vec_vrlh */ + +static vector short __ATTRS_o_ai +vec_vrlh(vector short __a, vector unsigned short __b) +{ + return __builtin_altivec_vrlh(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vrlh(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned short)__builtin_altivec_vrlh((vector short)__a, __b); +} + +/* vec_vrlw */ + +static vector int __ATTRS_o_ai +vec_vrlw(vector int __a, vector unsigned int __b) +{ + return __builtin_altivec_vrlw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vrlw(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned int)__builtin_altivec_vrlw((vector int)__a, __b); +} + +/* vec_round */ + +static vector float __attribute__((__always_inline__)) +vec_round(vector float __a) +{ + return __builtin_altivec_vrfin(__a); +} + +/* vec_vrfin */ + +static vector float __attribute__((__always_inline__)) +vec_vrfin(vector float __a) +{ + return __builtin_altivec_vrfin(__a); +} + +/* vec_rsqrte */ + +static __vector float __attribute__((__always_inline__)) +vec_rsqrte(vector float __a) +{ + return __builtin_altivec_vrsqrtefp(__a); +} + +/* vec_vrsqrtefp */ + +static __vector float __attribute__((__always_inline__)) +vec_vrsqrtefp(vector float __a) +{ + return __builtin_altivec_vrsqrtefp(__a); +} + +/* vec_sel */ + +#define __builtin_altivec_vsel_4si vec_sel + +static vector signed char __ATTRS_o_ai +vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c) +{ + return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c); +} + +static vector signed char __ATTRS_o_ai +vec_sel(vector signed char __a, vector signed char __b, vector bool char __c) +{ + return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c); +} + +static vector unsigned char __ATTRS_o_ai +vec_sel(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector unsigned char __ATTRS_o_ai +vec_sel(vector unsigned char __a, vector unsigned char __b, vector bool char __c) +{ + return (__a & ~(vector unsigned char)__c) | (__b & (vector unsigned char)__c); +} + +static vector bool char __ATTRS_o_ai +vec_sel(vector bool char __a, vector bool char __b, vector unsigned char __c) +{ + return (__a & ~(vector bool char)__c) | (__b & (vector bool char)__c); +} + +static vector bool char __ATTRS_o_ai +vec_sel(vector bool char __a, vector bool char __b, vector bool char __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector short __ATTRS_o_ai +vec_sel(vector short __a, vector short __b, vector unsigned short __c) +{ + return (__a & ~(vector short)__c) | (__b & (vector short)__c); +} + +static vector short __ATTRS_o_ai +vec_sel(vector short __a, vector short __b, vector bool short __c) +{ + return (__a & ~(vector short)__c) | (__b & (vector short)__c); +} + +static vector unsigned short __ATTRS_o_ai +vec_sel(vector unsigned short __a, + vector unsigned short __b, + vector unsigned short __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector unsigned short __ATTRS_o_ai +vec_sel(vector unsigned short __a, vector unsigned short __b, vector bool short __c) +{ + return (__a & ~(vector unsigned short)__c) | (__b & (vector unsigned short)__c); +} + +static vector bool short __ATTRS_o_ai +vec_sel(vector bool short __a, vector bool short __b, vector unsigned short __c) +{ + return (__a & ~(vector bool short)__c) | (__b & (vector bool short)__c); +} + +static vector bool short __ATTRS_o_ai +vec_sel(vector bool short __a, vector bool short __b, vector bool short __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector int __ATTRS_o_ai +vec_sel(vector int __a, vector int __b, vector unsigned int __c) +{ + return (__a & ~(vector int)__c) | (__b & (vector int)__c); +} + +static vector int __ATTRS_o_ai +vec_sel(vector int __a, vector int __b, vector bool int __c) +{ + return (__a & ~(vector int)__c) | (__b & (vector int)__c); +} + +static vector unsigned int __ATTRS_o_ai +vec_sel(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector unsigned int __ATTRS_o_ai +vec_sel(vector unsigned int __a, vector unsigned int __b, vector bool int __c) +{ + return (__a & ~(vector unsigned int)__c) | (__b & (vector unsigned int)__c); +} + +static vector bool int __ATTRS_o_ai +vec_sel(vector bool int __a, vector bool int __b, vector unsigned int __c) +{ + return (__a & ~(vector bool int)__c) | (__b & (vector bool int)__c); +} + +static vector bool int __ATTRS_o_ai +vec_sel(vector bool int __a, vector bool int __b, vector bool int __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector float __ATTRS_o_ai +vec_sel(vector float __a, vector float __b, vector unsigned int __c) +{ + vector int __res = ((vector int)__a & ~(vector int)__c) + | ((vector int)__b & (vector int)__c); + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_sel(vector float __a, vector float __b, vector bool int __c) +{ + vector int __res = ((vector int)__a & ~(vector int)__c) + | ((vector int)__b & (vector int)__c); + return (vector float)__res; +} + +/* vec_vsel */ + +static vector signed char __ATTRS_o_ai +vec_vsel(vector signed char __a, vector signed char __b, vector unsigned char __c) +{ + return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c); +} + +static vector signed char __ATTRS_o_ai +vec_vsel(vector signed char __a, vector signed char __b, vector bool char __c) +{ + return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsel(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsel(vector unsigned char __a, vector unsigned char __b, vector bool char __c) +{ + return (__a & ~(vector unsigned char)__c) | (__b & (vector unsigned char)__c); +} + +static vector bool char __ATTRS_o_ai +vec_vsel(vector bool char __a, vector bool char __b, vector unsigned char __c) +{ + return (__a & ~(vector bool char)__c) | (__b & (vector bool char)__c); +} + +static vector bool char __ATTRS_o_ai +vec_vsel(vector bool char __a, vector bool char __b, vector bool char __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector short __ATTRS_o_ai +vec_vsel(vector short __a, vector short __b, vector unsigned short __c) +{ + return (__a & ~(vector short)__c) | (__b & (vector short)__c); +} + +static vector short __ATTRS_o_ai +vec_vsel(vector short __a, vector short __b, vector bool short __c) +{ + return (__a & ~(vector short)__c) | (__b & (vector short)__c); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsel(vector unsigned short __a, + vector unsigned short __b, + vector unsigned short __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsel(vector unsigned short __a, vector unsigned short __b, vector bool short __c) +{ + return (__a & ~(vector unsigned short)__c) | (__b & (vector unsigned short)__c); +} + +static vector bool short __ATTRS_o_ai +vec_vsel(vector bool short __a, vector bool short __b, vector unsigned short __c) +{ + return (__a & ~(vector bool short)__c) | (__b & (vector bool short)__c); +} + +static vector bool short __ATTRS_o_ai +vec_vsel(vector bool short __a, vector bool short __b, vector bool short __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector int __ATTRS_o_ai +vec_vsel(vector int __a, vector int __b, vector unsigned int __c) +{ + return (__a & ~(vector int)__c) | (__b & (vector int)__c); +} + +static vector int __ATTRS_o_ai +vec_vsel(vector int __a, vector int __b, vector bool int __c) +{ + return (__a & ~(vector int)__c) | (__b & (vector int)__c); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsel(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsel(vector unsigned int __a, vector unsigned int __b, vector bool int __c) +{ + return (__a & ~(vector unsigned int)__c) | (__b & (vector unsigned int)__c); +} + +static vector bool int __ATTRS_o_ai +vec_vsel(vector bool int __a, vector bool int __b, vector unsigned int __c) +{ + return (__a & ~(vector bool int)__c) | (__b & (vector bool int)__c); +} + +static vector bool int __ATTRS_o_ai +vec_vsel(vector bool int __a, vector bool int __b, vector bool int __c) +{ + return (__a & ~__c) | (__b & __c); +} + +static vector float __ATTRS_o_ai +vec_vsel(vector float __a, vector float __b, vector unsigned int __c) +{ + vector int __res = ((vector int)__a & ~(vector int)__c) + | ((vector int)__b & (vector int)__c); + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vsel(vector float __a, vector float __b, vector bool int __c) +{ + vector int __res = ((vector int)__a & ~(vector int)__c) + | ((vector int)__b & (vector int)__c); + return (vector float)__res; +} + +/* vec_sl */ + +static vector signed char __ATTRS_o_ai +vec_sl(vector signed char __a, vector unsigned char __b) +{ + return __a << (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_sl(vector unsigned char __a, vector unsigned char __b) +{ + return __a << __b; +} + +static vector short __ATTRS_o_ai +vec_sl(vector short __a, vector unsigned short __b) +{ + return __a << (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_sl(vector unsigned short __a, vector unsigned short __b) +{ + return __a << __b; +} + +static vector int __ATTRS_o_ai +vec_sl(vector int __a, vector unsigned int __b) +{ + return __a << (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_sl(vector unsigned int __a, vector unsigned int __b) +{ + return __a << __b; +} + +/* vec_vslb */ + +#define __builtin_altivec_vslb vec_vslb + +static vector signed char __ATTRS_o_ai +vec_vslb(vector signed char __a, vector unsigned char __b) +{ + return vec_sl(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vslb(vector unsigned char __a, vector unsigned char __b) +{ + return vec_sl(__a, __b); +} + +/* vec_vslh */ + +#define __builtin_altivec_vslh vec_vslh + +static vector short __ATTRS_o_ai +vec_vslh(vector short __a, vector unsigned short __b) +{ + return vec_sl(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vslh(vector unsigned short __a, vector unsigned short __b) +{ + return vec_sl(__a, __b); +} + +/* vec_vslw */ + +#define __builtin_altivec_vslw vec_vslw + +static vector int __ATTRS_o_ai +vec_vslw(vector int __a, vector unsigned int __b) +{ + return vec_sl(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vslw(vector unsigned int __a, vector unsigned int __b) +{ + return vec_sl(__a, __b); +} + +/* vec_sld */ + +#define __builtin_altivec_vsldoi_4si vec_sld + +static vector signed char __ATTRS_o_ai +vec_sld(vector signed char __a, vector signed char __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector unsigned char __ATTRS_o_ai +vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector short __ATTRS_o_ai +vec_sld(vector short __a, vector short __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector unsigned short __ATTRS_o_ai +vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector pixel __ATTRS_o_ai +vec_sld(vector pixel __a, vector pixel __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector int __ATTRS_o_ai +vec_sld(vector int __a, vector int __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector unsigned int __ATTRS_o_ai +vec_sld(vector unsigned int __a, vector unsigned int __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector float __ATTRS_o_ai +vec_sld(vector float __a, vector float __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +/* vec_vsldoi */ + +static vector signed char __ATTRS_o_ai +vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsldoi(vector unsigned char __a, vector unsigned char __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector short __ATTRS_o_ai +vec_vsldoi(vector short __a, vector short __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsldoi(vector unsigned short __a, vector unsigned short __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector pixel __ATTRS_o_ai +vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector int __ATTRS_o_ai +vec_vsldoi(vector int __a, vector int __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsldoi(vector unsigned int __a, vector unsigned int __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +static vector float __ATTRS_o_ai +vec_vsldoi(vector float __a, vector float __b, unsigned char __c) +{ + return vec_perm(__a, __b, (vector unsigned char) + (__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15)); +} + +/* vec_sll */ + +static vector signed char __ATTRS_o_ai +vec_sll(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_sll(vector signed char __a, vector unsigned short __b) +{ + return (vector signed char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_sll(vector signed char __a, vector unsigned int __b) +{ + return (vector signed char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_sll(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_sll(vector unsigned char __a, vector unsigned short __b) +{ + return (vector unsigned char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_sll(vector unsigned char __a, vector unsigned int __b) +{ + return (vector unsigned char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_sll(vector bool char __a, vector unsigned char __b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_sll(vector bool char __a, vector unsigned short __b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_sll(vector bool char __a, vector unsigned int __b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_sll(vector short __a, vector unsigned char __b) +{ + return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_sll(vector short __a, vector unsigned short __b) +{ + return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_sll(vector short __a, vector unsigned int __b) +{ + return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_sll(vector unsigned short __a, vector unsigned char __b) +{ + return (vector unsigned short) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_sll(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned short) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_sll(vector unsigned short __a, vector unsigned int __b) +{ + return (vector unsigned short) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_sll(vector bool short __a, vector unsigned char __b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_sll(vector bool short __a, vector unsigned short __b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_sll(vector bool short __a, vector unsigned int __b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_sll(vector pixel __a, vector unsigned char __b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_sll(vector pixel __a, vector unsigned short __b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_sll(vector pixel __a, vector unsigned int __b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_sll(vector int __a, vector unsigned char __b) +{ + return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_sll(vector int __a, vector unsigned short __b) +{ + return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_sll(vector int __a, vector unsigned int __b) +{ + return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_sll(vector unsigned int __a, vector unsigned char __b) +{ + return (vector unsigned int) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_sll(vector unsigned int __a, vector unsigned short __b) +{ + return (vector unsigned int) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_sll(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned int) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_sll(vector bool int __a, vector unsigned char __b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_sll(vector bool int __a, vector unsigned short __b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_sll(vector bool int __a, vector unsigned int __b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +/* vec_vsl */ + +static vector signed char __ATTRS_o_ai +vec_vsl(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_vsl(vector signed char __a, vector unsigned short __b) +{ + return (vector signed char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_vsl(vector signed char __a, vector unsigned int __b) +{ + return (vector signed char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsl(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsl(vector unsigned char __a, vector unsigned short __b) +{ + return (vector unsigned char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsl(vector unsigned char __a, vector unsigned int __b) +{ + return (vector unsigned char) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_vsl(vector bool char __a, vector unsigned char __b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_vsl(vector bool char __a, vector unsigned short __b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_vsl(vector bool char __a, vector unsigned int __b) +{ + return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vsl(vector short __a, vector unsigned char __b) +{ + return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vsl(vector short __a, vector unsigned short __b) +{ + return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vsl(vector short __a, vector unsigned int __b) +{ + return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsl(vector unsigned short __a, vector unsigned char __b) +{ + return (vector unsigned short) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsl(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned short) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsl(vector unsigned short __a, vector unsigned int __b) +{ + return (vector unsigned short) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_vsl(vector bool short __a, vector unsigned char __b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_vsl(vector bool short __a, vector unsigned short __b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_vsl(vector bool short __a, vector unsigned int __b) +{ + return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vsl(vector pixel __a, vector unsigned char __b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vsl(vector pixel __a, vector unsigned short __b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vsl(vector pixel __a, vector unsigned int __b) +{ + return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vsl(vector int __a, vector unsigned char __b) +{ + return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vsl(vector int __a, vector unsigned short __b) +{ + return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vsl(vector int __a, vector unsigned int __b) +{ + return (vector int)__builtin_altivec_vsl(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsl(vector unsigned int __a, vector unsigned char __b) +{ + return (vector unsigned int) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsl(vector unsigned int __a, vector unsigned short __b) +{ + return (vector unsigned int) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsl(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned int) + __builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_vsl(vector bool int __a, vector unsigned char __b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_vsl(vector bool int __a, vector unsigned short __b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_vsl(vector bool int __a, vector unsigned int __b) +{ + return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b); +} + +/* vec_slo */ + +static vector signed char __ATTRS_o_ai +vec_slo(vector signed char __a, vector signed char __b) +{ + return (vector signed char) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_slo(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_slo(vector unsigned char __a, vector signed char __b) +{ + return (vector unsigned char) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_slo(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_slo(vector short __a, vector signed char __b) +{ + return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_slo(vector short __a, vector unsigned char __b) +{ + return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_slo(vector unsigned short __a, vector signed char __b) +{ + return (vector unsigned short) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_slo(vector unsigned short __a, vector unsigned char __b) +{ + return (vector unsigned short) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_slo(vector pixel __a, vector signed char __b) +{ + return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_slo(vector pixel __a, vector unsigned char __b) +{ + return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_slo(vector int __a, vector signed char __b) +{ + return (vector int)__builtin_altivec_vslo(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_slo(vector int __a, vector unsigned char __b) +{ + return (vector int)__builtin_altivec_vslo(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_slo(vector unsigned int __a, vector signed char __b) +{ + return (vector unsigned int) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_slo(vector unsigned int __a, vector unsigned char __b) +{ + return (vector unsigned int) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector float __ATTRS_o_ai +vec_slo(vector float __a, vector signed char __b) +{ + return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector float __ATTRS_o_ai +vec_slo(vector float __a, vector unsigned char __b) +{ + return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +/* vec_vslo */ + +static vector signed char __ATTRS_o_ai +vec_vslo(vector signed char __a, vector signed char __b) +{ + return (vector signed char) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_vslo(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vslo(vector unsigned char __a, vector signed char __b) +{ + return (vector unsigned char) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vslo(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vslo(vector short __a, vector signed char __b) +{ + return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vslo(vector short __a, vector unsigned char __b) +{ + return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vslo(vector unsigned short __a, vector signed char __b) +{ + return (vector unsigned short) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vslo(vector unsigned short __a, vector unsigned char __b) +{ + return (vector unsigned short) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vslo(vector pixel __a, vector signed char __b) +{ + return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vslo(vector pixel __a, vector unsigned char __b) +{ + return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vslo(vector int __a, vector signed char __b) +{ + return (vector int)__builtin_altivec_vslo(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vslo(vector int __a, vector unsigned char __b) +{ + return (vector int)__builtin_altivec_vslo(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vslo(vector unsigned int __a, vector signed char __b) +{ + return (vector unsigned int) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vslo(vector unsigned int __a, vector unsigned char __b) +{ + return (vector unsigned int) + __builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector float __ATTRS_o_ai +vec_vslo(vector float __a, vector signed char __b) +{ + return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +static vector float __ATTRS_o_ai +vec_vslo(vector float __a, vector unsigned char __b) +{ + return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b); +} + +/* vec_splat */ + +static vector signed char __ATTRS_o_ai +vec_splat(vector signed char __a, unsigned char __b) +{ + return vec_perm(__a, __a, (vector unsigned char)(__b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_splat(vector unsigned char __a, unsigned char __b) +{ + return vec_perm(__a, __a, (vector unsigned char)(__b)); +} + +static vector bool char __ATTRS_o_ai +vec_splat(vector bool char __a, unsigned char __b) +{ + return vec_perm(__a, __a, (vector unsigned char)(__b)); +} + +static vector short __ATTRS_o_ai +vec_splat(vector short __a, unsigned char __b) +{ + __b *= 2; + unsigned char b1=__b+1; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); +} + +static vector unsigned short __ATTRS_o_ai +vec_splat(vector unsigned short __a, unsigned char __b) +{ + __b *= 2; + unsigned char b1=__b+1; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); +} + +static vector bool short __ATTRS_o_ai +vec_splat(vector bool short __a, unsigned char __b) +{ + __b *= 2; + unsigned char b1=__b+1; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); +} + +static vector pixel __ATTRS_o_ai +vec_splat(vector pixel __a, unsigned char __b) +{ + __b *= 2; + unsigned char b1=__b+1; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); +} + +static vector int __ATTRS_o_ai +vec_splat(vector int __a, unsigned char __b) +{ + __b *= 4; + unsigned char b1=__b+1, b2=__b+2, b3=__b+3; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); +} + +static vector unsigned int __ATTRS_o_ai +vec_splat(vector unsigned int __a, unsigned char __b) +{ + __b *= 4; + unsigned char b1=__b+1, b2=__b+2, b3=__b+3; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); +} + +static vector bool int __ATTRS_o_ai +vec_splat(vector bool int __a, unsigned char __b) +{ + __b *= 4; + unsigned char b1=__b+1, b2=__b+2, b3=__b+3; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); +} + +static vector float __ATTRS_o_ai +vec_splat(vector float __a, unsigned char __b) +{ + __b *= 4; + unsigned char b1=__b+1, b2=__b+2, b3=__b+3; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); +} + +/* vec_vspltb */ + +#define __builtin_altivec_vspltb vec_vspltb + +static vector signed char __ATTRS_o_ai +vec_vspltb(vector signed char __a, unsigned char __b) +{ + return vec_perm(__a, __a, (vector unsigned char)(__b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_vspltb(vector unsigned char __a, unsigned char __b) +{ + return vec_perm(__a, __a, (vector unsigned char)(__b)); +} + +static vector bool char __ATTRS_o_ai +vec_vspltb(vector bool char __a, unsigned char __b) +{ + return vec_perm(__a, __a, (vector unsigned char)(__b)); +} + +/* vec_vsplth */ + +#define __builtin_altivec_vsplth vec_vsplth + +static vector short __ATTRS_o_ai +vec_vsplth(vector short __a, unsigned char __b) +{ + __b *= 2; + unsigned char b1=__b+1; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsplth(vector unsigned short __a, unsigned char __b) +{ + __b *= 2; + unsigned char b1=__b+1; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); +} + +static vector bool short __ATTRS_o_ai +vec_vsplth(vector bool short __a, unsigned char __b) +{ + __b *= 2; + unsigned char b1=__b+1; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); +} + +static vector pixel __ATTRS_o_ai +vec_vsplth(vector pixel __a, unsigned char __b) +{ + __b *= 2; + unsigned char b1=__b+1; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1)); +} + +/* vec_vspltw */ + +#define __builtin_altivec_vspltw vec_vspltw + +static vector int __ATTRS_o_ai +vec_vspltw(vector int __a, unsigned char __b) +{ + __b *= 4; + unsigned char b1=__b+1, b2=__b+2, b3=__b+3; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); +} + +static vector unsigned int __ATTRS_o_ai +vec_vspltw(vector unsigned int __a, unsigned char __b) +{ + __b *= 4; + unsigned char b1=__b+1, b2=__b+2, b3=__b+3; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); +} + +static vector bool int __ATTRS_o_ai +vec_vspltw(vector bool int __a, unsigned char __b) +{ + __b *= 4; + unsigned char b1=__b+1, b2=__b+2, b3=__b+3; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); +} + +static vector float __ATTRS_o_ai +vec_vspltw(vector float __a, unsigned char __b) +{ + __b *= 4; + unsigned char b1=__b+1, b2=__b+2, b3=__b+3; + return vec_perm(__a, __a, (vector unsigned char) + (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3)); +} + +/* vec_splat_s8 */ + +#define __builtin_altivec_vspltisb vec_splat_s8 + +// FIXME: parameter should be treated as 5-bit signed literal +static vector signed char __ATTRS_o_ai +vec_splat_s8(signed char __a) +{ + return (vector signed char)(__a); +} + +/* vec_vspltisb */ + +// FIXME: parameter should be treated as 5-bit signed literal +static vector signed char __ATTRS_o_ai +vec_vspltisb(signed char __a) +{ + return (vector signed char)(__a); +} + +/* vec_splat_s16 */ + +#define __builtin_altivec_vspltish vec_splat_s16 + +// FIXME: parameter should be treated as 5-bit signed literal +static vector short __ATTRS_o_ai +vec_splat_s16(signed char __a) +{ + return (vector short)(__a); +} + +/* vec_vspltish */ + +// FIXME: parameter should be treated as 5-bit signed literal +static vector short __ATTRS_o_ai +vec_vspltish(signed char __a) +{ + return (vector short)(__a); +} + +/* vec_splat_s32 */ + +#define __builtin_altivec_vspltisw vec_splat_s32 + +// FIXME: parameter should be treated as 5-bit signed literal +static vector int __ATTRS_o_ai +vec_splat_s32(signed char __a) +{ + return (vector int)(__a); +} + +/* vec_vspltisw */ + +// FIXME: parameter should be treated as 5-bit signed literal +static vector int __ATTRS_o_ai +vec_vspltisw(signed char __a) +{ + return (vector int)(__a); +} + +/* vec_splat_u8 */ + +// FIXME: parameter should be treated as 5-bit signed literal +static vector unsigned char __ATTRS_o_ai +vec_splat_u8(unsigned char __a) +{ + return (vector unsigned char)(__a); +} + +/* vec_splat_u16 */ + +// FIXME: parameter should be treated as 5-bit signed literal +static vector unsigned short __ATTRS_o_ai +vec_splat_u16(signed char __a) +{ + return (vector unsigned short)(__a); +} + +/* vec_splat_u32 */ + +// FIXME: parameter should be treated as 5-bit signed literal +static vector unsigned int __ATTRS_o_ai +vec_splat_u32(signed char __a) +{ + return (vector unsigned int)(__a); +} + +/* vec_sr */ + +static vector signed char __ATTRS_o_ai +vec_sr(vector signed char __a, vector unsigned char __b) +{ + return __a >> (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_sr(vector unsigned char __a, vector unsigned char __b) +{ + return __a >> __b; +} + +static vector short __ATTRS_o_ai +vec_sr(vector short __a, vector unsigned short __b) +{ + return __a >> (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_sr(vector unsigned short __a, vector unsigned short __b) +{ + return __a >> __b; +} + +static vector int __ATTRS_o_ai +vec_sr(vector int __a, vector unsigned int __b) +{ + return __a >> (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_sr(vector unsigned int __a, vector unsigned int __b) +{ + return __a >> __b; +} + +/* vec_vsrb */ + +#define __builtin_altivec_vsrb vec_vsrb + +static vector signed char __ATTRS_o_ai +vec_vsrb(vector signed char __a, vector unsigned char __b) +{ + return __a >> (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vsrb(vector unsigned char __a, vector unsigned char __b) +{ + return __a >> __b; +} + +/* vec_vsrh */ + +#define __builtin_altivec_vsrh vec_vsrh + +static vector short __ATTRS_o_ai +vec_vsrh(vector short __a, vector unsigned short __b) +{ + return __a >> (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vsrh(vector unsigned short __a, vector unsigned short __b) +{ + return __a >> __b; +} + +/* vec_vsrw */ + +#define __builtin_altivec_vsrw vec_vsrw + +static vector int __ATTRS_o_ai +vec_vsrw(vector int __a, vector unsigned int __b) +{ + return __a >> (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vsrw(vector unsigned int __a, vector unsigned int __b) +{ + return __a >> __b; +} + +/* vec_sra */ + +static vector signed char __ATTRS_o_ai +vec_sra(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char)__builtin_altivec_vsrab((vector char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_sra(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char)__builtin_altivec_vsrab((vector char)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_sra(vector short __a, vector unsigned short __b) +{ + return __builtin_altivec_vsrah(__a, (vector unsigned short)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_sra(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned short)__builtin_altivec_vsrah((vector short)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_sra(vector int __a, vector unsigned int __b) +{ + return __builtin_altivec_vsraw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_sra(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned int)__builtin_altivec_vsraw((vector int)__a, __b); +} + +/* vec_vsrab */ + +static vector signed char __ATTRS_o_ai +vec_vsrab(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char)__builtin_altivec_vsrab((vector char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsrab(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char)__builtin_altivec_vsrab((vector char)__a, __b); +} + +/* vec_vsrah */ + +static vector short __ATTRS_o_ai +vec_vsrah(vector short __a, vector unsigned short __b) +{ + return __builtin_altivec_vsrah(__a, (vector unsigned short)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsrah(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned short)__builtin_altivec_vsrah((vector short)__a, __b); +} + +/* vec_vsraw */ + +static vector int __ATTRS_o_ai +vec_vsraw(vector int __a, vector unsigned int __b) +{ + return __builtin_altivec_vsraw(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsraw(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned int)__builtin_altivec_vsraw((vector int)__a, __b); +} + +/* vec_srl */ + +static vector signed char __ATTRS_o_ai +vec_srl(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_srl(vector signed char __a, vector unsigned short __b) +{ + return (vector signed char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_srl(vector signed char __a, vector unsigned int __b) +{ + return (vector signed char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_srl(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_srl(vector unsigned char __a, vector unsigned short __b) +{ + return (vector unsigned char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_srl(vector unsigned char __a, vector unsigned int __b) +{ + return (vector unsigned char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_srl(vector bool char __a, vector unsigned char __b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_srl(vector bool char __a, vector unsigned short __b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_srl(vector bool char __a, vector unsigned int __b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_srl(vector short __a, vector unsigned char __b) +{ + return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_srl(vector short __a, vector unsigned short __b) +{ + return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_srl(vector short __a, vector unsigned int __b) +{ + return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_srl(vector unsigned short __a, vector unsigned char __b) +{ + return (vector unsigned short) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_srl(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned short) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_srl(vector unsigned short __a, vector unsigned int __b) +{ + return (vector unsigned short) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_srl(vector bool short __a, vector unsigned char __b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_srl(vector bool short __a, vector unsigned short __b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_srl(vector bool short __a, vector unsigned int __b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_srl(vector pixel __a, vector unsigned char __b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_srl(vector pixel __a, vector unsigned short __b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_srl(vector pixel __a, vector unsigned int __b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_srl(vector int __a, vector unsigned char __b) +{ + return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_srl(vector int __a, vector unsigned short __b) +{ + return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_srl(vector int __a, vector unsigned int __b) +{ + return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_srl(vector unsigned int __a, vector unsigned char __b) +{ + return (vector unsigned int) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_srl(vector unsigned int __a, vector unsigned short __b) +{ + return (vector unsigned int) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_srl(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned int) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_srl(vector bool int __a, vector unsigned char __b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_srl(vector bool int __a, vector unsigned short __b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_srl(vector bool int __a, vector unsigned int __b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +/* vec_vsr */ + +static vector signed char __ATTRS_o_ai +vec_vsr(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_vsr(vector signed char __a, vector unsigned short __b) +{ + return (vector signed char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_vsr(vector signed char __a, vector unsigned int __b) +{ + return (vector signed char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsr(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsr(vector unsigned char __a, vector unsigned short __b) +{ + return (vector unsigned char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsr(vector unsigned char __a, vector unsigned int __b) +{ + return (vector unsigned char) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_vsr(vector bool char __a, vector unsigned char __b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_vsr(vector bool char __a, vector unsigned short __b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool char __ATTRS_o_ai +vec_vsr(vector bool char __a, vector unsigned int __b) +{ + return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vsr(vector short __a, vector unsigned char __b) +{ + return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vsr(vector short __a, vector unsigned short __b) +{ + return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vsr(vector short __a, vector unsigned int __b) +{ + return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsr(vector unsigned short __a, vector unsigned char __b) +{ + return (vector unsigned short) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsr(vector unsigned short __a, vector unsigned short __b) +{ + return (vector unsigned short) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsr(vector unsigned short __a, vector unsigned int __b) +{ + return (vector unsigned short) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_vsr(vector bool short __a, vector unsigned char __b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_vsr(vector bool short __a, vector unsigned short __b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool short __ATTRS_o_ai +vec_vsr(vector bool short __a, vector unsigned int __b) +{ + return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vsr(vector pixel __a, vector unsigned char __b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vsr(vector pixel __a, vector unsigned short __b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vsr(vector pixel __a, vector unsigned int __b) +{ + return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vsr(vector int __a, vector unsigned char __b) +{ + return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vsr(vector int __a, vector unsigned short __b) +{ + return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vsr(vector int __a, vector unsigned int __b) +{ + return (vector int)__builtin_altivec_vsr(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsr(vector unsigned int __a, vector unsigned char __b) +{ + return (vector unsigned int) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsr(vector unsigned int __a, vector unsigned short __b) +{ + return (vector unsigned int) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsr(vector unsigned int __a, vector unsigned int __b) +{ + return (vector unsigned int) + __builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_vsr(vector bool int __a, vector unsigned char __b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_vsr(vector bool int __a, vector unsigned short __b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +static vector bool int __ATTRS_o_ai +vec_vsr(vector bool int __a, vector unsigned int __b) +{ + return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b); +} + +/* vec_sro */ + +static vector signed char __ATTRS_o_ai +vec_sro(vector signed char __a, vector signed char __b) +{ + return (vector signed char) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_sro(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_sro(vector unsigned char __a, vector signed char __b) +{ + return (vector unsigned char) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_sro(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_sro(vector short __a, vector signed char __b) +{ + return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_sro(vector short __a, vector unsigned char __b) +{ + return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_sro(vector unsigned short __a, vector signed char __b) +{ + return (vector unsigned short) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_sro(vector unsigned short __a, vector unsigned char __b) +{ + return (vector unsigned short) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_sro(vector pixel __a, vector signed char __b) +{ + return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_sro(vector pixel __a, vector unsigned char __b) +{ + return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_sro(vector int __a, vector signed char __b) +{ + return (vector int)__builtin_altivec_vsro(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_sro(vector int __a, vector unsigned char __b) +{ + return (vector int)__builtin_altivec_vsro(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_sro(vector unsigned int __a, vector signed char __b) +{ + return (vector unsigned int) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_sro(vector unsigned int __a, vector unsigned char __b) +{ + return (vector unsigned int) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector float __ATTRS_o_ai +vec_sro(vector float __a, vector signed char __b) +{ + return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector float __ATTRS_o_ai +vec_sro(vector float __a, vector unsigned char __b) +{ + return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +/* vec_vsro */ + +static vector signed char __ATTRS_o_ai +vec_vsro(vector signed char __a, vector signed char __b) +{ + return (vector signed char) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector signed char __ATTRS_o_ai +vec_vsro(vector signed char __a, vector unsigned char __b) +{ + return (vector signed char) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsro(vector unsigned char __a, vector signed char __b) +{ + return (vector unsigned char) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsro(vector unsigned char __a, vector unsigned char __b) +{ + return (vector unsigned char) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vsro(vector short __a, vector signed char __b) +{ + return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector short __ATTRS_o_ai +vec_vsro(vector short __a, vector unsigned char __b) +{ + return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsro(vector unsigned short __a, vector signed char __b) +{ + return (vector unsigned short) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsro(vector unsigned short __a, vector unsigned char __b) +{ + return (vector unsigned short) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vsro(vector pixel __a, vector signed char __b) +{ + return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector pixel __ATTRS_o_ai +vec_vsro(vector pixel __a, vector unsigned char __b) +{ + return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vsro(vector int __a, vector signed char __b) +{ + return (vector int)__builtin_altivec_vsro(__a, (vector int)__b); +} + +static vector int __ATTRS_o_ai +vec_vsro(vector int __a, vector unsigned char __b) +{ + return (vector int)__builtin_altivec_vsro(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsro(vector unsigned int __a, vector signed char __b) +{ + return (vector unsigned int) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsro(vector unsigned int __a, vector unsigned char __b) +{ + return (vector unsigned int) + __builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector float __ATTRS_o_ai +vec_vsro(vector float __a, vector signed char __b) +{ + return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +static vector float __ATTRS_o_ai +vec_vsro(vector float __a, vector unsigned char __b) +{ + return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b); +} + +/* vec_st */ + +static void __ATTRS_o_ai +vec_st(vector signed char __a, int __b, vector signed char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector signed char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector unsigned char __a, int __b, vector unsigned char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector unsigned char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool char __a, int __b, vector bool char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector short __a, int __b, vector short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector short __a, int __b, short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector unsigned short __a, int __b, vector unsigned short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector unsigned short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool short __a, int __b, short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool short __a, int __b, vector bool short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector pixel __a, int __b, short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector pixel __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector pixel __a, int __b, vector pixel *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector int __a, int __b, vector int *__c) +{ + __builtin_altivec_stvx(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector int __a, int __b, int *__c) +{ + __builtin_altivec_stvx(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector unsigned int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool int __a, int __b, int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector bool int __a, int __b, vector bool int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector float __a, int __b, vector float *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_st(vector float __a, int __b, float *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +/* vec_stvx */ + +static void __ATTRS_o_ai +vec_stvx(vector signed char __a, int __b, vector signed char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector signed char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector unsigned char __a, int __b, vector unsigned char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector unsigned char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool char __a, int __b, vector bool char *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector short __a, int __b, vector short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector short __a, int __b, short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector unsigned short __a, int __b, vector unsigned short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector unsigned short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool short __a, int __b, short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool short __a, int __b, vector bool short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector pixel __a, int __b, short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector pixel __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector pixel __a, int __b, vector pixel *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector int __a, int __b, vector int *__c) +{ + __builtin_altivec_stvx(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector int __a, int __b, int *__c) +{ + __builtin_altivec_stvx(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector unsigned int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool int __a, int __b, int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector bool int __a, int __b, vector bool int *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector float __a, int __b, vector float *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvx(vector float __a, int __b, float *__c) +{ + __builtin_altivec_stvx((vector int)__a, __b, __c); +} + +/* vec_ste */ + +static void __ATTRS_o_ai +vec_ste(vector signed char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvebx((vector char)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector unsigned char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvebx((vector char)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvebx((vector char)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvebx((vector char)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector short __a, int __b, short *__c) +{ + __builtin_altivec_stvehx(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector unsigned short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool short __a, int __b, short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector pixel __a, int __b, short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector pixel __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector int __a, int __b, int *__c) +{ + __builtin_altivec_stvewx(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector unsigned int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvewx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool int __a, int __b, int *__c) +{ + __builtin_altivec_stvewx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector bool int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvewx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_ste(vector float __a, int __b, float *__c) +{ + __builtin_altivec_stvewx((vector int)__a, __b, __c); +} + +/* vec_stvebx */ + +static void __ATTRS_o_ai +vec_stvebx(vector signed char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvebx((vector char)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvebx(vector unsigned char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvebx((vector char)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvebx(vector bool char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvebx((vector char)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvebx(vector bool char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvebx((vector char)__a, __b, __c); +} + +/* vec_stvehx */ + +static void __ATTRS_o_ai +vec_stvehx(vector short __a, int __b, short *__c) +{ + __builtin_altivec_stvehx(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvehx(vector unsigned short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvehx(vector bool short __a, int __b, short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvehx(vector bool short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvehx(vector pixel __a, int __b, short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvehx(vector pixel __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvehx((vector short)__a, __b, __c); +} + +/* vec_stvewx */ + +static void __ATTRS_o_ai +vec_stvewx(vector int __a, int __b, int *__c) +{ + __builtin_altivec_stvewx(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvewx(vector unsigned int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvewx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvewx(vector bool int __a, int __b, int *__c) +{ + __builtin_altivec_stvewx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvewx(vector bool int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvewx((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvewx(vector float __a, int __b, float *__c) +{ + __builtin_altivec_stvewx((vector int)__a, __b, __c); +} + +/* vec_stl */ + +static void __ATTRS_o_ai +vec_stl(vector signed char __a, int __b, vector signed char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector signed char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector unsigned char __a, int __b, vector unsigned char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector unsigned char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool char __a, int __b, vector bool char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector short __a, int __b, vector short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector short __a, int __b, short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector unsigned short __a, int __b, vector unsigned short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector unsigned short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool short __a, int __b, short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool short __a, int __b, vector bool short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector pixel __a, int __b, short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector pixel __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector pixel __a, int __b, vector pixel *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector int __a, int __b, vector int *__c) +{ + __builtin_altivec_stvxl(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector int __a, int __b, int *__c) +{ + __builtin_altivec_stvxl(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector unsigned int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool int __a, int __b, int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector bool int __a, int __b, vector bool int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector float __a, int __b, vector float *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stl(vector float __a, int __b, float *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +/* vec_stvxl */ + +static void __ATTRS_o_ai +vec_stvxl(vector signed char __a, int __b, vector signed char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector signed char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector unsigned char __a, int __b, vector unsigned char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector unsigned char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool char __a, int __b, signed char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool char __a, int __b, unsigned char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool char __a, int __b, vector bool char *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector short __a, int __b, vector short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector short __a, int __b, short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector unsigned short __a, int __b, vector unsigned short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector unsigned short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool short __a, int __b, short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool short __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool short __a, int __b, vector bool short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector pixel __a, int __b, short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector pixel __a, int __b, unsigned short *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector pixel __a, int __b, vector pixel *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector int __a, int __b, vector int *__c) +{ + __builtin_altivec_stvxl(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector int __a, int __b, int *__c) +{ + __builtin_altivec_stvxl(__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector unsigned int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool int __a, int __b, int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool int __a, int __b, unsigned int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector bool int __a, int __b, vector bool int *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector float __a, int __b, vector float *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +static void __ATTRS_o_ai +vec_stvxl(vector float __a, int __b, float *__c) +{ + __builtin_altivec_stvxl((vector int)__a, __b, __c); +} + +/* vec_sub */ + +static vector signed char __ATTRS_o_ai +vec_sub(vector signed char __a, vector signed char __b) +{ + return __a - __b; +} + +static vector signed char __ATTRS_o_ai +vec_sub(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a - __b; +} + +static vector signed char __ATTRS_o_ai +vec_sub(vector signed char __a, vector bool char __b) +{ + return __a - (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_sub(vector unsigned char __a, vector unsigned char __b) +{ + return __a - __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_sub(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a - __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_sub(vector unsigned char __a, vector bool char __b) +{ + return __a - (vector unsigned char)__b; +} + +static vector short __ATTRS_o_ai +vec_sub(vector short __a, vector short __b) +{ + return __a - __b; +} + +static vector short __ATTRS_o_ai +vec_sub(vector bool short __a, vector short __b) +{ + return (vector short)__a - __b; +} + +static vector short __ATTRS_o_ai +vec_sub(vector short __a, vector bool short __b) +{ + return __a - (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_sub(vector unsigned short __a, vector unsigned short __b) +{ + return __a - __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_sub(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a - __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_sub(vector unsigned short __a, vector bool short __b) +{ + return __a - (vector unsigned short)__b; +} + +static vector int __ATTRS_o_ai +vec_sub(vector int __a, vector int __b) +{ + return __a - __b; +} + +static vector int __ATTRS_o_ai +vec_sub(vector bool int __a, vector int __b) +{ + return (vector int)__a - __b; +} + +static vector int __ATTRS_o_ai +vec_sub(vector int __a, vector bool int __b) +{ + return __a - (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_sub(vector unsigned int __a, vector unsigned int __b) +{ + return __a - __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_sub(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a - __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_sub(vector unsigned int __a, vector bool int __b) +{ + return __a - (vector unsigned int)__b; +} + +static vector float __ATTRS_o_ai +vec_sub(vector float __a, vector float __b) +{ + return __a - __b; +} + +/* vec_vsububm */ + +#define __builtin_altivec_vsububm vec_vsububm + +static vector signed char __ATTRS_o_ai +vec_vsububm(vector signed char __a, vector signed char __b) +{ + return __a - __b; +} + +static vector signed char __ATTRS_o_ai +vec_vsububm(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a - __b; +} + +static vector signed char __ATTRS_o_ai +vec_vsububm(vector signed char __a, vector bool char __b) +{ + return __a - (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vsububm(vector unsigned char __a, vector unsigned char __b) +{ + return __a - __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vsububm(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a - __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vsububm(vector unsigned char __a, vector bool char __b) +{ + return __a - (vector unsigned char)__b; +} + +/* vec_vsubuhm */ + +#define __builtin_altivec_vsubuhm vec_vsubuhm + +static vector short __ATTRS_o_ai +vec_vsubuhm(vector short __a, vector short __b) +{ + return __a - __b; +} + +static vector short __ATTRS_o_ai +vec_vsubuhm(vector bool short __a, vector short __b) +{ + return (vector short)__a - __b; +} + +static vector short __ATTRS_o_ai +vec_vsubuhm(vector short __a, vector bool short __b) +{ + return __a - (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vsubuhm(vector unsigned short __a, vector unsigned short __b) +{ + return __a - __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vsubuhm(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a - __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vsubuhm(vector unsigned short __a, vector bool short __b) +{ + return __a - (vector unsigned short)__b; +} + +/* vec_vsubuwm */ + +#define __builtin_altivec_vsubuwm vec_vsubuwm + +static vector int __ATTRS_o_ai +vec_vsubuwm(vector int __a, vector int __b) +{ + return __a - __b; +} + +static vector int __ATTRS_o_ai +vec_vsubuwm(vector bool int __a, vector int __b) +{ + return (vector int)__a - __b; +} + +static vector int __ATTRS_o_ai +vec_vsubuwm(vector int __a, vector bool int __b) +{ + return __a - (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vsubuwm(vector unsigned int __a, vector unsigned int __b) +{ + return __a - __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vsubuwm(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a - __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vsubuwm(vector unsigned int __a, vector bool int __b) +{ + return __a - (vector unsigned int)__b; +} + +/* vec_vsubfp */ + +#define __builtin_altivec_vsubfp vec_vsubfp + +static vector float __attribute__((__always_inline__)) +vec_vsubfp(vector float __a, vector float __b) +{ + return __a - __b; +} + +/* vec_subc */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_subc(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vsubcuw(__a, __b); +} + +/* vec_vsubcuw */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vsubcuw(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vsubcuw(__a, __b); +} + +/* vec_subs */ + +static vector signed char __ATTRS_o_ai +vec_subs(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vsubsbs(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_subs(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vsubsbs((vector signed char)__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_subs(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vsubsbs(__a, (vector signed char)__b); +} + +static vector unsigned char __ATTRS_o_ai +vec_subs(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vsububs(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_subs(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vsububs((vector unsigned char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_subs(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vsububs(__a, (vector unsigned char)__b); +} + +static vector short __ATTRS_o_ai +vec_subs(vector short __a, vector short __b) +{ + return __builtin_altivec_vsubshs(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_subs(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vsubshs((vector short)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_subs(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vsubshs(__a, (vector short)__b); +} + +static vector unsigned short __ATTRS_o_ai +vec_subs(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vsubuhs(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_subs(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vsubuhs((vector unsigned short)__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_subs(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vsubuhs(__a, (vector unsigned short)__b); +} + +static vector int __ATTRS_o_ai +vec_subs(vector int __a, vector int __b) +{ + return __builtin_altivec_vsubsws(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_subs(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vsubsws((vector int)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_subs(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vsubsws(__a, (vector int)__b); +} + +static vector unsigned int __ATTRS_o_ai +vec_subs(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vsubuws(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_subs(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vsubuws((vector unsigned int)__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_subs(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vsubuws(__a, (vector unsigned int)__b); +} + +/* vec_vsubsbs */ + +static vector signed char __ATTRS_o_ai +vec_vsubsbs(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vsubsbs(__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_vsubsbs(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vsubsbs((vector signed char)__a, __b); +} + +static vector signed char __ATTRS_o_ai +vec_vsubsbs(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vsubsbs(__a, (vector signed char)__b); +} + +/* vec_vsububs */ + +static vector unsigned char __ATTRS_o_ai +vec_vsububs(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vsububs(__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsububs(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vsububs((vector unsigned char)__a, __b); +} + +static vector unsigned char __ATTRS_o_ai +vec_vsububs(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vsububs(__a, (vector unsigned char)__b); +} + +/* vec_vsubshs */ + +static vector short __ATTRS_o_ai +vec_vsubshs(vector short __a, vector short __b) +{ + return __builtin_altivec_vsubshs(__a, __b); +} + +static vector short __ATTRS_o_ai +vec_vsubshs(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vsubshs((vector short)__a, __b); +} + +static vector short __ATTRS_o_ai +vec_vsubshs(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vsubshs(__a, (vector short)__b); +} + +/* vec_vsubuhs */ + +static vector unsigned short __ATTRS_o_ai +vec_vsubuhs(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vsubuhs(__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsubuhs(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vsubuhs((vector unsigned short)__a, __b); +} + +static vector unsigned short __ATTRS_o_ai +vec_vsubuhs(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vsubuhs(__a, (vector unsigned short)__b); +} + +/* vec_vsubsws */ + +static vector int __ATTRS_o_ai +vec_vsubsws(vector int __a, vector int __b) +{ + return __builtin_altivec_vsubsws(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_vsubsws(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vsubsws((vector int)__a, __b); +} + +static vector int __ATTRS_o_ai +vec_vsubsws(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vsubsws(__a, (vector int)__b); +} + +/* vec_vsubuws */ + +static vector unsigned int __ATTRS_o_ai +vec_vsubuws(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vsubuws(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsubuws(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vsubuws((vector unsigned int)__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_vsubuws(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vsubuws(__a, (vector unsigned int)__b); +} + +/* vec_sum4s */ + +static vector int __ATTRS_o_ai +vec_sum4s(vector signed char __a, vector int __b) +{ + return __builtin_altivec_vsum4sbs(__a, __b); +} + +static vector unsigned int __ATTRS_o_ai +vec_sum4s(vector unsigned char __a, vector unsigned int __b) +{ + return __builtin_altivec_vsum4ubs(__a, __b); +} + +static vector int __ATTRS_o_ai +vec_sum4s(vector signed short __a, vector int __b) +{ + return __builtin_altivec_vsum4shs(__a, __b); +} + +/* vec_vsum4sbs */ + +static vector int __attribute__((__always_inline__)) +vec_vsum4sbs(vector signed char __a, vector int __b) +{ + return __builtin_altivec_vsum4sbs(__a, __b); +} + +/* vec_vsum4ubs */ + +static vector unsigned int __attribute__((__always_inline__)) +vec_vsum4ubs(vector unsigned char __a, vector unsigned int __b) +{ + return __builtin_altivec_vsum4ubs(__a, __b); +} + +/* vec_vsum4shs */ + +static vector int __attribute__((__always_inline__)) +vec_vsum4shs(vector signed short __a, vector int __b) +{ + return __builtin_altivec_vsum4shs(__a, __b); +} + +/* vec_sum2s */ + +static vector signed int __attribute__((__always_inline__)) +vec_sum2s(vector int __a, vector int __b) +{ + return __builtin_altivec_vsum2sws(__a, __b); +} + +/* vec_vsum2sws */ + +static vector signed int __attribute__((__always_inline__)) +vec_vsum2sws(vector int __a, vector int __b) +{ + return __builtin_altivec_vsum2sws(__a, __b); +} + +/* vec_sums */ + +static vector signed int __attribute__((__always_inline__)) +vec_sums(vector signed int __a, vector signed int __b) +{ + return __builtin_altivec_vsumsws(__a, __b); +} + +/* vec_vsumsws */ + +static vector signed int __attribute__((__always_inline__)) +vec_vsumsws(vector signed int __a, vector signed int __b) +{ + return __builtin_altivec_vsumsws(__a, __b); +} + +/* vec_trunc */ + +static vector float __attribute__((__always_inline__)) +vec_trunc(vector float __a) +{ + return __builtin_altivec_vrfiz(__a); +} + +/* vec_vrfiz */ + +static vector float __attribute__((__always_inline__)) +vec_vrfiz(vector float __a) +{ + return __builtin_altivec_vrfiz(__a); +} + +/* vec_unpackh */ + +static vector short __ATTRS_o_ai +vec_unpackh(vector signed char __a) +{ + return __builtin_altivec_vupkhsb((vector char)__a); +} + +static vector bool short __ATTRS_o_ai +vec_unpackh(vector bool char __a) +{ + return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a); +} + +static vector int __ATTRS_o_ai +vec_unpackh(vector short __a) +{ + return __builtin_altivec_vupkhsh(__a); +} + +static vector bool int __ATTRS_o_ai +vec_unpackh(vector bool short __a) +{ + return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a); +} + +static vector unsigned int __ATTRS_o_ai +vec_unpackh(vector pixel __a) +{ + return (vector unsigned int)__builtin_altivec_vupkhsh((vector short)__a); +} + +/* vec_vupkhsb */ + +static vector short __ATTRS_o_ai +vec_vupkhsb(vector signed char __a) +{ + return __builtin_altivec_vupkhsb((vector char)__a); +} + +static vector bool short __ATTRS_o_ai +vec_vupkhsb(vector bool char __a) +{ + return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a); +} + +/* vec_vupkhsh */ + +static vector int __ATTRS_o_ai +vec_vupkhsh(vector short __a) +{ + return __builtin_altivec_vupkhsh(__a); +} + +static vector bool int __ATTRS_o_ai +vec_vupkhsh(vector bool short __a) +{ + return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a); +} + +static vector unsigned int __ATTRS_o_ai +vec_vupkhsh(vector pixel __a) +{ + return (vector unsigned int)__builtin_altivec_vupkhsh((vector short)__a); +} + +/* vec_unpackl */ + +static vector short __ATTRS_o_ai +vec_unpackl(vector signed char __a) +{ + return __builtin_altivec_vupklsb((vector char)__a); +} + +static vector bool short __ATTRS_o_ai +vec_unpackl(vector bool char __a) +{ + return (vector bool short)__builtin_altivec_vupklsb((vector char)__a); +} + +static vector int __ATTRS_o_ai +vec_unpackl(vector short __a) +{ + return __builtin_altivec_vupklsh(__a); +} + +static vector bool int __ATTRS_o_ai +vec_unpackl(vector bool short __a) +{ + return (vector bool int)__builtin_altivec_vupklsh((vector short)__a); +} + +static vector unsigned int __ATTRS_o_ai +vec_unpackl(vector pixel __a) +{ + return (vector unsigned int)__builtin_altivec_vupklsh((vector short)__a); +} + +/* vec_vupklsb */ + +static vector short __ATTRS_o_ai +vec_vupklsb(vector signed char __a) +{ + return __builtin_altivec_vupklsb((vector char)__a); +} + +static vector bool short __ATTRS_o_ai +vec_vupklsb(vector bool char __a) +{ + return (vector bool short)__builtin_altivec_vupklsb((vector char)__a); +} + +/* vec_vupklsh */ + +static vector int __ATTRS_o_ai +vec_vupklsh(vector short __a) +{ + return __builtin_altivec_vupklsh(__a); +} + +static vector bool int __ATTRS_o_ai +vec_vupklsh(vector bool short __a) +{ + return (vector bool int)__builtin_altivec_vupklsh((vector short)__a); +} + +static vector unsigned int __ATTRS_o_ai +vec_vupklsh(vector pixel __a) +{ + return (vector unsigned int)__builtin_altivec_vupklsh((vector short)__a); +} + +/* vec_xor */ + +#define __builtin_altivec_vxor vec_xor + +static vector signed char __ATTRS_o_ai +vec_xor(vector signed char __a, vector signed char __b) +{ + return __a ^ __b; +} + +static vector signed char __ATTRS_o_ai +vec_xor(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a ^ __b; +} + +static vector signed char __ATTRS_o_ai +vec_xor(vector signed char __a, vector bool char __b) +{ + return __a ^ (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_xor(vector unsigned char __a, vector unsigned char __b) +{ + return __a ^ __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_xor(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a ^ __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_xor(vector unsigned char __a, vector bool char __b) +{ + return __a ^ (vector unsigned char)__b; +} + +static vector bool char __ATTRS_o_ai +vec_xor(vector bool char __a, vector bool char __b) +{ + return __a ^ __b; +} + +static vector short __ATTRS_o_ai +vec_xor(vector short __a, vector short __b) +{ + return __a ^ __b; +} + +static vector short __ATTRS_o_ai +vec_xor(vector bool short __a, vector short __b) +{ + return (vector short)__a ^ __b; +} + +static vector short __ATTRS_o_ai +vec_xor(vector short __a, vector bool short __b) +{ + return __a ^ (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_xor(vector unsigned short __a, vector unsigned short __b) +{ + return __a ^ __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_xor(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a ^ __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_xor(vector unsigned short __a, vector bool short __b) +{ + return __a ^ (vector unsigned short)__b; +} + +static vector bool short __ATTRS_o_ai +vec_xor(vector bool short __a, vector bool short __b) +{ + return __a ^ __b; +} + +static vector int __ATTRS_o_ai +vec_xor(vector int __a, vector int __b) +{ + return __a ^ __b; +} + +static vector int __ATTRS_o_ai +vec_xor(vector bool int __a, vector int __b) +{ + return (vector int)__a ^ __b; +} + +static vector int __ATTRS_o_ai +vec_xor(vector int __a, vector bool int __b) +{ + return __a ^ (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_xor(vector unsigned int __a, vector unsigned int __b) +{ + return __a ^ __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_xor(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a ^ __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_xor(vector unsigned int __a, vector bool int __b) +{ + return __a ^ (vector unsigned int)__b; +} + +static vector bool int __ATTRS_o_ai +vec_xor(vector bool int __a, vector bool int __b) +{ + return __a ^ __b; +} + +static vector float __ATTRS_o_ai +vec_xor(vector float __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_xor(vector bool int __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_xor(vector float __a, vector bool int __b) +{ + vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; + return (vector float)__res; +} + +/* vec_vxor */ + +static vector signed char __ATTRS_o_ai +vec_vxor(vector signed char __a, vector signed char __b) +{ + return __a ^ __b; +} + +static vector signed char __ATTRS_o_ai +vec_vxor(vector bool char __a, vector signed char __b) +{ + return (vector signed char)__a ^ __b; +} + +static vector signed char __ATTRS_o_ai +vec_vxor(vector signed char __a, vector bool char __b) +{ + return __a ^ (vector signed char)__b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vxor(vector unsigned char __a, vector unsigned char __b) +{ + return __a ^ __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vxor(vector bool char __a, vector unsigned char __b) +{ + return (vector unsigned char)__a ^ __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_vxor(vector unsigned char __a, vector bool char __b) +{ + return __a ^ (vector unsigned char)__b; +} + +static vector bool char __ATTRS_o_ai +vec_vxor(vector bool char __a, vector bool char __b) +{ + return __a ^ __b; +} + +static vector short __ATTRS_o_ai +vec_vxor(vector short __a, vector short __b) +{ + return __a ^ __b; +} + +static vector short __ATTRS_o_ai +vec_vxor(vector bool short __a, vector short __b) +{ + return (vector short)__a ^ __b; +} + +static vector short __ATTRS_o_ai +vec_vxor(vector short __a, vector bool short __b) +{ + return __a ^ (vector short)__b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vxor(vector unsigned short __a, vector unsigned short __b) +{ + return __a ^ __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vxor(vector bool short __a, vector unsigned short __b) +{ + return (vector unsigned short)__a ^ __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_vxor(vector unsigned short __a, vector bool short __b) +{ + return __a ^ (vector unsigned short)__b; +} + +static vector bool short __ATTRS_o_ai +vec_vxor(vector bool short __a, vector bool short __b) +{ + return __a ^ __b; +} + +static vector int __ATTRS_o_ai +vec_vxor(vector int __a, vector int __b) +{ + return __a ^ __b; +} + +static vector int __ATTRS_o_ai +vec_vxor(vector bool int __a, vector int __b) +{ + return (vector int)__a ^ __b; +} + +static vector int __ATTRS_o_ai +vec_vxor(vector int __a, vector bool int __b) +{ + return __a ^ (vector int)__b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vxor(vector unsigned int __a, vector unsigned int __b) +{ + return __a ^ __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vxor(vector bool int __a, vector unsigned int __b) +{ + return (vector unsigned int)__a ^ __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_vxor(vector unsigned int __a, vector bool int __b) +{ + return __a ^ (vector unsigned int)__b; +} + +static vector bool int __ATTRS_o_ai +vec_vxor(vector bool int __a, vector bool int __b) +{ + return __a ^ __b; +} + +static vector float __ATTRS_o_ai +vec_vxor(vector float __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vxor(vector bool int __a, vector float __b) +{ + vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; + return (vector float)__res; +} + +static vector float __ATTRS_o_ai +vec_vxor(vector float __a, vector bool int __b) +{ + vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b; + return (vector float)__res; +} + +/* ------------------------ extensions for CBEA ----------------------------- */ + +/* vec_extract */ + +static signed char __ATTRS_o_ai +vec_extract(vector signed char __a, int __b) +{ + return __a[__b]; +} + +static unsigned char __ATTRS_o_ai +vec_extract(vector unsigned char __a, int __b) +{ + return __a[__b]; +} + +static short __ATTRS_o_ai +vec_extract(vector short __a, int __b) +{ + return __a[__b]; +} + +static unsigned short __ATTRS_o_ai +vec_extract(vector unsigned short __a, int __b) +{ + return __a[__b]; +} + +static int __ATTRS_o_ai +vec_extract(vector int __a, int __b) +{ + return __a[__b]; +} + +static unsigned int __ATTRS_o_ai +vec_extract(vector unsigned int __a, int __b) +{ + return __a[__b]; +} + +static float __ATTRS_o_ai +vec_extract(vector float __a, int __b) +{ + return __a[__b]; +} + +/* vec_insert */ + +static vector signed char __ATTRS_o_ai +vec_insert(signed char __a, vector signed char __b, int __c) +{ + __b[__c] = __a; + return __b; +} + +static vector unsigned char __ATTRS_o_ai +vec_insert(unsigned char __a, vector unsigned char __b, int __c) +{ + __b[__c] = __a; + return __b; +} + +static vector short __ATTRS_o_ai +vec_insert(short __a, vector short __b, int __c) +{ + __b[__c] = __a; + return __b; +} + +static vector unsigned short __ATTRS_o_ai +vec_insert(unsigned short __a, vector unsigned short __b, int __c) +{ + __b[__c] = __a; + return __b; +} + +static vector int __ATTRS_o_ai +vec_insert(int __a, vector int __b, int __c) +{ + __b[__c] = __a; + return __b; +} + +static vector unsigned int __ATTRS_o_ai +vec_insert(unsigned int __a, vector unsigned int __b, int __c) +{ + __b[__c] = __a; + return __b; +} + +static vector float __ATTRS_o_ai +vec_insert(float __a, vector float __b, int __c) +{ + __b[__c] = __a; + return __b; +} + +/* vec_lvlx */ + +static vector signed char __ATTRS_o_ai +vec_lvlx(int __a, const signed char *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector signed char)(0), + vec_lvsl(__a, __b)); +} + +static vector signed char __ATTRS_o_ai +vec_lvlx(int __a, const vector signed char *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector signed char)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvlx(int __a, const unsigned char *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector unsigned char)(0), + vec_lvsl(__a, __b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvlx(int __a, const vector unsigned char *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector unsigned char)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool char __ATTRS_o_ai +vec_lvlx(int __a, const vector bool char *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector bool char)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector short __ATTRS_o_ai +vec_lvlx(int __a, const short *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector short)(0), + vec_lvsl(__a, __b)); +} + +static vector short __ATTRS_o_ai +vec_lvlx(int __a, const vector short *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector short)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvlx(int __a, const unsigned short *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector unsigned short)(0), + vec_lvsl(__a, __b)); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvlx(int __a, const vector unsigned short *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector unsigned short)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool short __ATTRS_o_ai +vec_lvlx(int __a, const vector bool short *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector bool short)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector pixel __ATTRS_o_ai +vec_lvlx(int __a, const vector pixel *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector pixel)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector int __ATTRS_o_ai +vec_lvlx(int __a, const int *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector int)(0), + vec_lvsl(__a, __b)); +} + +static vector int __ATTRS_o_ai +vec_lvlx(int __a, const vector int *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector int)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvlx(int __a, const unsigned int *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector unsigned int)(0), + vec_lvsl(__a, __b)); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvlx(int __a, const vector unsigned int *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector unsigned int)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool int __ATTRS_o_ai +vec_lvlx(int __a, const vector bool int *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector bool int)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector float __ATTRS_o_ai +vec_lvlx(int __a, const float *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector float)(0), + vec_lvsl(__a, __b)); +} + +static vector float __ATTRS_o_ai +vec_lvlx(int __a, const vector float *__b) +{ + return vec_perm(vec_ld(__a, __b), + (vector float)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +/* vec_lvlxl */ + +static vector signed char __ATTRS_o_ai +vec_lvlxl(int __a, const signed char *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector signed char)(0), + vec_lvsl(__a, __b)); +} + +static vector signed char __ATTRS_o_ai +vec_lvlxl(int __a, const vector signed char *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector signed char)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvlxl(int __a, const unsigned char *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector unsigned char)(0), + vec_lvsl(__a, __b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvlxl(int __a, const vector unsigned char *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector unsigned char)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool char __ATTRS_o_ai +vec_lvlxl(int __a, const vector bool char *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector bool char)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector short __ATTRS_o_ai +vec_lvlxl(int __a, const short *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector short)(0), + vec_lvsl(__a, __b)); +} + +static vector short __ATTRS_o_ai +vec_lvlxl(int __a, const vector short *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector short)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvlxl(int __a, const unsigned short *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector unsigned short)(0), + vec_lvsl(__a, __b)); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvlxl(int __a, const vector unsigned short *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector unsigned short)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool short __ATTRS_o_ai +vec_lvlxl(int __a, const vector bool short *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector bool short)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector pixel __ATTRS_o_ai +vec_lvlxl(int __a, const vector pixel *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector pixel)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector int __ATTRS_o_ai +vec_lvlxl(int __a, const int *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector int)(0), + vec_lvsl(__a, __b)); +} + +static vector int __ATTRS_o_ai +vec_lvlxl(int __a, const vector int *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector int)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvlxl(int __a, const unsigned int *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector unsigned int)(0), + vec_lvsl(__a, __b)); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvlxl(int __a, const vector unsigned int *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector unsigned int)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool int __ATTRS_o_ai +vec_lvlxl(int __a, const vector bool int *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector bool int)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector float __ATTRS_o_ai +vec_lvlxl(int __a, const float *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector float)(0), + vec_lvsl(__a, __b)); +} + +static vector float __ATTRS_o_ai +vec_lvlxl(int __a, vector float *__b) +{ + return vec_perm(vec_ldl(__a, __b), + (vector float)(0), + vec_lvsl(__a, (unsigned char *)__b)); +} + +/* vec_lvrx */ + +static vector signed char __ATTRS_o_ai +vec_lvrx(int __a, const signed char *__b) +{ + return vec_perm((vector signed char)(0), + vec_ld(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector signed char __ATTRS_o_ai +vec_lvrx(int __a, const vector signed char *__b) +{ + return vec_perm((vector signed char)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvrx(int __a, const unsigned char *__b) +{ + return vec_perm((vector unsigned char)(0), + vec_ld(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvrx(int __a, const vector unsigned char *__b) +{ + return vec_perm((vector unsigned char)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool char __ATTRS_o_ai +vec_lvrx(int __a, const vector bool char *__b) +{ + return vec_perm((vector bool char)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector short __ATTRS_o_ai +vec_lvrx(int __a, const short *__b) +{ + return vec_perm((vector short)(0), + vec_ld(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector short __ATTRS_o_ai +vec_lvrx(int __a, const vector short *__b) +{ + return vec_perm((vector short)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvrx(int __a, const unsigned short *__b) +{ + return vec_perm((vector unsigned short)(0), + vec_ld(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvrx(int __a, const vector unsigned short *__b) +{ + return vec_perm((vector unsigned short)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool short __ATTRS_o_ai +vec_lvrx(int __a, const vector bool short *__b) +{ + return vec_perm((vector bool short)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector pixel __ATTRS_o_ai +vec_lvrx(int __a, const vector pixel *__b) +{ + return vec_perm((vector pixel)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector int __ATTRS_o_ai +vec_lvrx(int __a, const int *__b) +{ + return vec_perm((vector int)(0), + vec_ld(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector int __ATTRS_o_ai +vec_lvrx(int __a, const vector int *__b) +{ + return vec_perm((vector int)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvrx(int __a, const unsigned int *__b) +{ + return vec_perm((vector unsigned int)(0), + vec_ld(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvrx(int __a, const vector unsigned int *__b) +{ + return vec_perm((vector unsigned int)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool int __ATTRS_o_ai +vec_lvrx(int __a, const vector bool int *__b) +{ + return vec_perm((vector bool int)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector float __ATTRS_o_ai +vec_lvrx(int __a, const float *__b) +{ + return vec_perm((vector float)(0), + vec_ld(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector float __ATTRS_o_ai +vec_lvrx(int __a, const vector float *__b) +{ + return vec_perm((vector float)(0), + vec_ld(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +/* vec_lvrxl */ + +static vector signed char __ATTRS_o_ai +vec_lvrxl(int __a, const signed char *__b) +{ + return vec_perm((vector signed char)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector signed char __ATTRS_o_ai +vec_lvrxl(int __a, const vector signed char *__b) +{ + return vec_perm((vector signed char)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvrxl(int __a, const unsigned char *__b) +{ + return vec_perm((vector unsigned char)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector unsigned char __ATTRS_o_ai +vec_lvrxl(int __a, const vector unsigned char *__b) +{ + return vec_perm((vector unsigned char)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool char __ATTRS_o_ai +vec_lvrxl(int __a, const vector bool char *__b) +{ + return vec_perm((vector bool char)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector short __ATTRS_o_ai +vec_lvrxl(int __a, const short *__b) +{ + return vec_perm((vector short)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector short __ATTRS_o_ai +vec_lvrxl(int __a, const vector short *__b) +{ + return vec_perm((vector short)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvrxl(int __a, const unsigned short *__b) +{ + return vec_perm((vector unsigned short)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector unsigned short __ATTRS_o_ai +vec_lvrxl(int __a, const vector unsigned short *__b) +{ + return vec_perm((vector unsigned short)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool short __ATTRS_o_ai +vec_lvrxl(int __a, const vector bool short *__b) +{ + return vec_perm((vector bool short)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector pixel __ATTRS_o_ai +vec_lvrxl(int __a, const vector pixel *__b) +{ + return vec_perm((vector pixel)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector int __ATTRS_o_ai +vec_lvrxl(int __a, const int *__b) +{ + return vec_perm((vector int)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector int __ATTRS_o_ai +vec_lvrxl(int __a, const vector int *__b) +{ + return vec_perm((vector int)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvrxl(int __a, const unsigned int *__b) +{ + return vec_perm((vector unsigned int)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector unsigned int __ATTRS_o_ai +vec_lvrxl(int __a, const vector unsigned int *__b) +{ + return vec_perm((vector unsigned int)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector bool int __ATTRS_o_ai +vec_lvrxl(int __a, const vector bool int *__b) +{ + return vec_perm((vector bool int)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +static vector float __ATTRS_o_ai +vec_lvrxl(int __a, const float *__b) +{ + return vec_perm((vector float)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, __b)); +} + +static vector float __ATTRS_o_ai +vec_lvrxl(int __a, const vector float *__b) +{ + return vec_perm((vector float)(0), + vec_ldl(__a, __b), + vec_lvsl(__a, (unsigned char *)__b)); +} + +/* vec_stvlx */ + +static void __ATTRS_o_ai +vec_stvlx(vector signed char __a, int __b, signed char *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector signed char __a, int __b, vector signed char *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector unsigned char __a, int __b, unsigned char *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector unsigned char __a, int __b, vector unsigned char *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector bool char __a, int __b, vector bool char *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector short __a, int __b, short *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector short __a, int __b, vector short *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector unsigned short __a, int __b, unsigned short *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector unsigned short __a, int __b, vector unsigned short *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector bool short __a, int __b, vector bool short *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector pixel __a, int __b, vector pixel *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector int __a, int __b, int *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector int __a, int __b, vector int *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector unsigned int __a, int __b, unsigned int *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector bool int __a, int __b, vector bool int *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlx(vector float __a, int __b, vector float *__c) +{ + return vec_st(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +/* vec_stvlxl */ + +static void __ATTRS_o_ai +vec_stvlxl(vector signed char __a, int __b, signed char *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector signed char __a, int __b, vector signed char *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector unsigned char __a, int __b, unsigned char *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector unsigned char __a, int __b, vector unsigned char *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector bool char __a, int __b, vector bool char *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector short __a, int __b, short *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector short __a, int __b, vector short *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector unsigned short __a, int __b, unsigned short *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector unsigned short __a, int __b, vector unsigned short *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector bool short __a, int __b, vector bool short *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector pixel __a, int __b, vector pixel *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector int __a, int __b, int *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector int __a, int __b, vector int *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector unsigned int __a, int __b, unsigned int *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector bool int __a, int __b, vector bool int *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvlxl(vector float __a, int __b, vector float *__c) +{ + return vec_stl(vec_perm(vec_lvrx(__b, __c), + __a, + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +/* vec_stvrx */ + +static void __ATTRS_o_ai +vec_stvrx(vector signed char __a, int __b, signed char *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector signed char __a, int __b, vector signed char *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector unsigned char __a, int __b, unsigned char *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector unsigned char __a, int __b, vector unsigned char *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector bool char __a, int __b, vector bool char *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector short __a, int __b, short *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector short __a, int __b, vector short *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector unsigned short __a, int __b, unsigned short *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector unsigned short __a, int __b, vector unsigned short *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector bool short __a, int __b, vector bool short *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector pixel __a, int __b, vector pixel *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector int __a, int __b, int *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector int __a, int __b, vector int *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector unsigned int __a, int __b, unsigned int *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector bool int __a, int __b, vector bool int *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrx(vector float __a, int __b, vector float *__c) +{ + return vec_st(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +/* vec_stvrxl */ + +static void __ATTRS_o_ai +vec_stvrxl(vector signed char __a, int __b, signed char *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector signed char __a, int __b, vector signed char *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector unsigned char __a, int __b, unsigned char *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector unsigned char __a, int __b, vector unsigned char *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector bool char __a, int __b, vector bool char *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector short __a, int __b, short *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector short __a, int __b, vector short *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector unsigned short __a, int __b, unsigned short *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector unsigned short __a, int __b, vector unsigned short *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector bool short __a, int __b, vector bool short *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector pixel __a, int __b, vector pixel *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector int __a, int __b, int *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector int __a, int __b, vector int *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector unsigned int __a, int __b, unsigned int *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, __c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector unsigned int __a, int __b, vector unsigned int *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector bool int __a, int __b, vector bool int *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +static void __ATTRS_o_ai +vec_stvrxl(vector float __a, int __b, vector float *__c) +{ + return vec_stl(vec_perm(__a, + vec_lvlx(__b, __c), + vec_lvsr(__b, (unsigned char *)__c)), + __b, __c); +} + +/* vec_promote */ + +static vector signed char __ATTRS_o_ai +vec_promote(signed char __a, int __b) +{ + vector signed char __res = (vector signed char)(0); + __res[__b] = __a; + return __res; +} + +static vector unsigned char __ATTRS_o_ai +vec_promote(unsigned char __a, int __b) +{ + vector unsigned char __res = (vector unsigned char)(0); + __res[__b] = __a; + return __res; +} + +static vector short __ATTRS_o_ai +vec_promote(short __a, int __b) +{ + vector short __res = (vector short)(0); + __res[__b] = __a; + return __res; +} + +static vector unsigned short __ATTRS_o_ai +vec_promote(unsigned short __a, int __b) +{ + vector unsigned short __res = (vector unsigned short)(0); + __res[__b] = __a; + return __res; +} + +static vector int __ATTRS_o_ai +vec_promote(int __a, int __b) +{ + vector int __res = (vector int)(0); + __res[__b] = __a; + return __res; +} + +static vector unsigned int __ATTRS_o_ai +vec_promote(unsigned int __a, int __b) +{ + vector unsigned int __res = (vector unsigned int)(0); + __res[__b] = __a; + return __res; +} + +static vector float __ATTRS_o_ai +vec_promote(float __a, int __b) +{ + vector float __res = (vector float)(0); + __res[__b] = __a; + return __res; +} + +/* vec_splats */ + +static vector signed char __ATTRS_o_ai +vec_splats(signed char __a) +{ + return (vector signed char)(__a); +} + +static vector unsigned char __ATTRS_o_ai +vec_splats(unsigned char __a) +{ + return (vector unsigned char)(__a); +} + +static vector short __ATTRS_o_ai +vec_splats(short __a) +{ + return (vector short)(__a); +} + +static vector unsigned short __ATTRS_o_ai +vec_splats(unsigned short __a) +{ + return (vector unsigned short)(__a); +} + +static vector int __ATTRS_o_ai +vec_splats(int __a) +{ + return (vector int)(__a); +} + +static vector unsigned int __ATTRS_o_ai +vec_splats(unsigned int __a) +{ + return (vector unsigned int)(__a); +} + +static vector float __ATTRS_o_ai +vec_splats(float __a) +{ + return (vector float)(__a); +} + +/* ----------------------------- predicates --------------------------------- */ + +/* vec_all_eq */ + +static int __ATTRS_o_ai +vec_all_eq(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT, __a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector unsigned short __a, vector unsigned short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector unsigned short __a, vector bool short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool short __a, vector short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool short __a, vector unsigned short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool short __a, vector bool short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector pixel __a, vector pixel __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, __a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_eq(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __b); +} + +/* vec_all_ge */ + +static int __ATTRS_o_ai +vec_all_ge(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, (vector signed char)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, + (vector unsigned char)__b, + (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __b, (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, + (vector unsigned char)__b, + (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, (vector short)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, + (vector unsigned short)__b, + (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b, (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, + (vector unsigned short)__b, + (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, (vector int)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, + (vector unsigned int)__b, + (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __b, (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, + (vector unsigned int)__b, + (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_all_ge(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgefp_p(__CR6_LT, __a, __b); +} + +/* vec_all_gt */ + +static int __ATTRS_o_ai +vec_all_gt(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __a, (vector signed char)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, __a, (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, + (vector unsigned char)__a, + (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, + (vector unsigned char)__a, + (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a, (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, + (vector unsigned short)__a, + (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, + (vector unsigned short)__a, + (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __a, (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, + (vector unsigned int)__a, + (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a, __b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, + (vector unsigned int)__a, + (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_all_gt(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __a, __b); +} + +/* vec_all_in */ + +static int __attribute__((__always_inline__)) +vec_all_in(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpbfp_p(__CR6_EQ, __a, __b); +} + +/* vec_all_le */ + +static int __ATTRS_o_ai +vec_all_le(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __a, (vector signed char)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __a, (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, + (vector unsigned char)__a, + (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ, + (vector unsigned char)__a, + (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a, (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, + (vector unsigned short)__a, + (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, + (vector unsigned short)__a, + (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __a, (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, + (vector unsigned int)__a, + (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a, __b); +} + +static int __ATTRS_o_ai +vec_all_le(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, + (vector unsigned int)__a, + (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_all_le(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgefp_p(__CR6_LT, __b, __a); +} + +/* vec_all_lt */ + +static int __ATTRS_o_ai +vec_all_lt(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT, (vector signed char)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, + (vector unsigned char)__b, + (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, __b, (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT, + (vector unsigned char)__b, + (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT, (vector short)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, + (vector unsigned short)__b, + (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b, (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT, + (vector unsigned short)__b, + (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT, (vector int)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b, __a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, + (vector unsigned int)__b, + (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __b, (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT, + (vector unsigned int)__b, + (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_all_lt(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __b, __a); +} + +/* vec_all_nan */ + +static int __attribute__((__always_inline__)) +vec_all_nan(vector float __a) +{ + return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __a); +} + +/* vec_all_ne */ + +static int __ATTRS_o_ai +vec_all_ne(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ, __a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector unsigned short __a, vector unsigned short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector unsigned short __a, vector bool short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool short __a, vector short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool short __a, vector unsigned short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool short __a, vector bool short __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector pixel __a, vector pixel __b) +{ + return + __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, __a, __b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, __a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_all_ne(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __b); +} + +/* vec_all_nge */ + +static int __attribute__((__always_inline__)) +vec_all_nge(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __a, __b); +} + +/* vec_all_ngt */ + +static int __attribute__((__always_inline__)) +vec_all_ngt(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __a, __b); +} + +/* vec_all_nle */ + +static int __attribute__((__always_inline__)) +vec_all_nle(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __b, __a); +} + +/* vec_all_nlt */ + +static int __attribute__((__always_inline__)) +vec_all_nlt(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __b, __a); +} + +/* vec_all_numeric */ + +static int __attribute__((__always_inline__)) +vec_all_numeric(vector float __a) +{ + return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __a); +} + +/* vec_any_eq */ + +static int __ATTRS_o_ai +vec_any_eq(vector signed char __a, vector signed char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector signed char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector unsigned char __a, vector unsigned char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector unsigned char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool char __a, vector signed char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool char __a, vector unsigned char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, __a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector pixel __a, vector pixel __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, __a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector unsigned int __a, vector unsigned int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector unsigned int __a, vector bool int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool int __a, vector int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool int __a, vector unsigned int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector bool int __a, vector bool int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_eq(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __b); +} + +/* vec_any_ge */ + +static int __ATTRS_o_ai +vec_any_ge(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, (vector signed char)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector unsigned char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, + (vector unsigned char)__b, + (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b, (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, + (vector unsigned char)__b, + (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, (vector short)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector unsigned short __a, vector bool short __b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, + (vector unsigned short)__b, + (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool short __a, vector unsigned short __b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b, (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, + (vector unsigned short)__b, + (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, (vector int)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, + (vector unsigned int)__b, + (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b, (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, + (vector unsigned int)__b, + (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_any_ge(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __a, __b); +} + +/* vec_any_gt */ + +static int __ATTRS_o_ai +vec_any_gt(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a, (vector signed char)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector unsigned char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a, (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, + (vector unsigned char)__a, + (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool char __a, vector unsigned char __b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, + (vector unsigned char)__a, + (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector unsigned short __a, vector bool short __b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a, (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, + (vector unsigned short)__a, + (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool short __a, vector unsigned short __b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, + (vector unsigned short)__a, + (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a, (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, + (vector unsigned int)__a, + (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a, __b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, + (vector unsigned int)__a, + (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_any_gt(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __a, __b); +} + +/* vec_any_le */ + +static int __ATTRS_o_ai +vec_any_le(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a, (vector signed char)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector unsigned char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a, (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, + (vector unsigned char)__a, + (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool char __a, vector unsigned char __b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, + (vector unsigned char)__a, + (vector unsigned char)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector unsigned short __a, vector bool short __b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a, (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, + (vector unsigned short)__a, + (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool short __a, vector unsigned short __b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, + (vector unsigned short)__a, + (vector unsigned short)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a, (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, + (vector unsigned int)__a, + (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a, __b); +} + +static int __ATTRS_o_ai +vec_any_le(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, + (vector unsigned int)__a, + (vector unsigned int)__b); +} + +static int __ATTRS_o_ai +vec_any_le(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __b, __a); +} + +/* vec_any_lt */ + +static int __ATTRS_o_ai +vec_any_lt(vector signed char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector signed char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, (vector signed char)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector unsigned char __a, vector unsigned char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector unsigned char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool char __a, vector signed char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, + (vector unsigned char)__b, + (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool char __a, vector unsigned char __b) +{ + return + __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b, (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool char __a, vector bool char __b) +{ + return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, + (vector unsigned char)__b, + (vector unsigned char)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, (vector short)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector unsigned short __a, vector bool short __b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, + (vector unsigned short)__b, + (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool short __a, vector unsigned short __b) +{ + return + __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b, (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, + (vector unsigned short)__b, + (vector unsigned short)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, (vector int)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector unsigned int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector unsigned int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b, __a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool int __a, vector int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, + (vector unsigned int)__b, + (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool int __a, vector unsigned int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b, (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector bool int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, + (vector unsigned int)__b, + (vector unsigned int)__a); +} + +static int __ATTRS_o_ai +vec_any_lt(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __b, __a); +} + +/* vec_any_nan */ + +static int __attribute__((__always_inline__)) +vec_any_nan(vector float __a) +{ + return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __a); +} + +/* vec_any_ne */ + +static int __ATTRS_o_ai +vec_any_ne(vector signed char __a, vector signed char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector signed char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector unsigned char __a, vector unsigned char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector unsigned char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool char __a, vector signed char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool char __a, vector unsigned char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool char __a, vector bool char __b) +{ + return + __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector short __a, vector short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, __a, (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector unsigned short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector unsigned short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool short __a, vector short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool short __a, vector unsigned short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool short __a, vector bool short __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector pixel __a, vector pixel __b) +{ + return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, + (vector short)__a, + (vector short)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector int __a, vector int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, __a, __b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector int __a, vector bool int __b) +{ + return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, __a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector unsigned int __a, vector unsigned int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector unsigned int __a, vector bool int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool int __a, vector int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool int __a, vector unsigned int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector bool int __a, vector bool int __b) +{ + return + __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b); +} + +static int __ATTRS_o_ai +vec_any_ne(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __b); +} + +/* vec_any_nge */ + +static int __attribute__((__always_inline__)) +vec_any_nge(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __a, __b); +} + +/* vec_any_ngt */ + +static int __attribute__((__always_inline__)) +vec_any_ngt(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __a, __b); +} + +/* vec_any_nle */ + +static int __attribute__((__always_inline__)) +vec_any_nle(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __b, __a); +} + +/* vec_any_nlt */ + +static int __attribute__((__always_inline__)) +vec_any_nlt(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __b, __a); +} + +/* vec_any_numeric */ + +static int __attribute__((__always_inline__)) +vec_any_numeric(vector float __a) +{ + return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __a); +} + +/* vec_any_out */ + +static int __attribute__((__always_inline__)) +vec_any_out(vector float __a, vector float __b) +{ + return __builtin_altivec_vcmpbfp_p(__CR6_EQ_REV, __a, __b); +} + +#undef __ATTRS_o_ai + +#endif /* __ALTIVEC_H */ diff --git a/python/clang/5.1/include/ammintrin.h b/python/clang/5.1/include/ammintrin.h new file mode 100644 index 00000000..d87b9cde --- /dev/null +++ b/python/clang/5.1/include/ammintrin.h @@ -0,0 +1,68 @@ +/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __AMMINTRIN_H +#define __AMMINTRIN_H + +#ifndef __SSE4A__ +#error "SSE4A instruction set not enabled" +#else + +#include + +#define _mm_extracti_si64(x, len, idx) \ + ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ + (char)(len), (char)(idx))) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_extract_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); +} + +#define _mm_inserti_si64(x, y, len, idx) \ + ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ + (__v2di)(__m128i)(y), \ + (char)(len), (char)(idx))) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_insert_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_sd(double *__p, __m128d __a) +{ + __builtin_ia32_movntsd(__p, (__v2df)__a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_ss(float *__p, __m128 __a) +{ + __builtin_ia32_movntss(__p, (__v4sf)__a); +} + +#endif /* __SSE4A__ */ + +#endif /* __AMMINTRIN_H */ diff --git a/python/clang/5.1/include/arm_neon.h b/python/clang/5.1/include/arm_neon.h new file mode 100644 index 00000000..0ab3df99 --- /dev/null +++ b/python/clang/5.1/include/arm_neon.h @@ -0,0 +1,7342 @@ +/*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ARM_NEON_H +#define __ARM_NEON_H + +#if defined(__arm64) +#include "aarch64_simd.h" +#else + +#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD) +#error "NEON support not enabled" +#endif + +#include + +typedef float float32_t; +#ifdef __aarch64__ +typedef __fp16 float16_t; +typedef double float64_t; +#else +typedef uint16_t float16_t; +#endif + +#ifdef __aarch64__ +typedef uint8_t poly8_t; +typedef uint16_t poly16_t; +#else +typedef int8_t poly8_t; +typedef int16_t poly16_t; +#endif +typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; +typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; +typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; +typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; +typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t; +typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; +typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t; +typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; +typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; +typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; +typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; +typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; +typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; +typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; +typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; +typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; +typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; +typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; +typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; +typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; +#ifdef __aarch64__ +typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t; +typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t; +#endif +typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; +typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; +typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t; +typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; + +typedef struct int8x8x2_t { + int8x8_t val[2]; +} int8x8x2_t; + +typedef struct int8x16x2_t { + int8x16_t val[2]; +} int8x16x2_t; + +typedef struct int16x4x2_t { + int16x4_t val[2]; +} int16x4x2_t; + +typedef struct int16x8x2_t { + int16x8_t val[2]; +} int16x8x2_t; + +typedef struct int32x2x2_t { + int32x2_t val[2]; +} int32x2x2_t; + +typedef struct int32x4x2_t { + int32x4_t val[2]; +} int32x4x2_t; + +typedef struct int64x1x2_t { + int64x1_t val[2]; +} int64x1x2_t; + +typedef struct int64x2x2_t { + int64x2_t val[2]; +} int64x2x2_t; + +typedef struct uint8x8x2_t { + uint8x8_t val[2]; +} uint8x8x2_t; + +typedef struct uint8x16x2_t { + uint8x16_t val[2]; +} uint8x16x2_t; + +typedef struct uint16x4x2_t { + uint16x4_t val[2]; +} uint16x4x2_t; + +typedef struct uint16x8x2_t { + uint16x8_t val[2]; +} uint16x8x2_t; + +typedef struct uint32x2x2_t { + uint32x2_t val[2]; +} uint32x2x2_t; + +typedef struct uint32x4x2_t { + uint32x4_t val[2]; +} uint32x4x2_t; + +typedef struct uint64x1x2_t { + uint64x1_t val[2]; +} uint64x1x2_t; + +typedef struct uint64x2x2_t { + uint64x2_t val[2]; +} uint64x2x2_t; + +typedef struct float16x4x2_t { + float16x4_t val[2]; +} float16x4x2_t; + +typedef struct float16x8x2_t { + float16x8_t val[2]; +} float16x8x2_t; + +typedef struct float32x2x2_t { + float32x2_t val[2]; +} float32x2x2_t; + +typedef struct float32x4x2_t { + float32x4_t val[2]; +} float32x4x2_t; + +#ifdef __aarch64__ +typedef struct float64x1x2_t { + float64x1_t val[2]; +} float64x1x2_t; + +typedef struct float64x2x2_t { + float64x2_t val[2]; +} float64x2x2_t; + +#endif +typedef struct poly8x8x2_t { + poly8x8_t val[2]; +} poly8x8x2_t; + +typedef struct poly8x16x2_t { + poly8x16_t val[2]; +} poly8x16x2_t; + +typedef struct poly16x4x2_t { + poly16x4_t val[2]; +} poly16x4x2_t; + +typedef struct poly16x8x2_t { + poly16x8_t val[2]; +} poly16x8x2_t; + +typedef struct int8x8x3_t { + int8x8_t val[3]; +} int8x8x3_t; + +typedef struct int8x16x3_t { + int8x16_t val[3]; +} int8x16x3_t; + +typedef struct int16x4x3_t { + int16x4_t val[3]; +} int16x4x3_t; + +typedef struct int16x8x3_t { + int16x8_t val[3]; +} int16x8x3_t; + +typedef struct int32x2x3_t { + int32x2_t val[3]; +} int32x2x3_t; + +typedef struct int32x4x3_t { + int32x4_t val[3]; +} int32x4x3_t; + +typedef struct int64x1x3_t { + int64x1_t val[3]; +} int64x1x3_t; + +typedef struct int64x2x3_t { + int64x2_t val[3]; +} int64x2x3_t; + +typedef struct uint8x8x3_t { + uint8x8_t val[3]; +} uint8x8x3_t; + +typedef struct uint8x16x3_t { + uint8x16_t val[3]; +} uint8x16x3_t; + +typedef struct uint16x4x3_t { + uint16x4_t val[3]; +} uint16x4x3_t; + +typedef struct uint16x8x3_t { + uint16x8_t val[3]; +} uint16x8x3_t; + +typedef struct uint32x2x3_t { + uint32x2_t val[3]; +} uint32x2x3_t; + +typedef struct uint32x4x3_t { + uint32x4_t val[3]; +} uint32x4x3_t; + +typedef struct uint64x1x3_t { + uint64x1_t val[3]; +} uint64x1x3_t; + +typedef struct uint64x2x3_t { + uint64x2_t val[3]; +} uint64x2x3_t; + +typedef struct float16x4x3_t { + float16x4_t val[3]; +} float16x4x3_t; + +typedef struct float16x8x3_t { + float16x8_t val[3]; +} float16x8x3_t; + +typedef struct float32x2x3_t { + float32x2_t val[3]; +} float32x2x3_t; + +typedef struct float32x4x3_t { + float32x4_t val[3]; +} float32x4x3_t; + +#ifdef __aarch64__ +typedef struct float64x1x3_t { + float64x1_t val[3]; +} float64x1x3_t; + +typedef struct float64x2x3_t { + float64x2_t val[3]; +} float64x2x3_t; + +#endif +typedef struct poly8x8x3_t { + poly8x8_t val[3]; +} poly8x8x3_t; + +typedef struct poly8x16x3_t { + poly8x16_t val[3]; +} poly8x16x3_t; + +typedef struct poly16x4x3_t { + poly16x4_t val[3]; +} poly16x4x3_t; + +typedef struct poly16x8x3_t { + poly16x8_t val[3]; +} poly16x8x3_t; + +typedef struct int8x8x4_t { + int8x8_t val[4]; +} int8x8x4_t; + +typedef struct int8x16x4_t { + int8x16_t val[4]; +} int8x16x4_t; + +typedef struct int16x4x4_t { + int16x4_t val[4]; +} int16x4x4_t; + +typedef struct int16x8x4_t { + int16x8_t val[4]; +} int16x8x4_t; + +typedef struct int32x2x4_t { + int32x2_t val[4]; +} int32x2x4_t; + +typedef struct int32x4x4_t { + int32x4_t val[4]; +} int32x4x4_t; + +typedef struct int64x1x4_t { + int64x1_t val[4]; +} int64x1x4_t; + +typedef struct int64x2x4_t { + int64x2_t val[4]; +} int64x2x4_t; + +typedef struct uint8x8x4_t { + uint8x8_t val[4]; +} uint8x8x4_t; + +typedef struct uint8x16x4_t { + uint8x16_t val[4]; +} uint8x16x4_t; + +typedef struct uint16x4x4_t { + uint16x4_t val[4]; +} uint16x4x4_t; + +typedef struct uint16x8x4_t { + uint16x8_t val[4]; +} uint16x8x4_t; + +typedef struct uint32x2x4_t { + uint32x2_t val[4]; +} uint32x2x4_t; + +typedef struct uint32x4x4_t { + uint32x4_t val[4]; +} uint32x4x4_t; + +typedef struct uint64x1x4_t { + uint64x1_t val[4]; +} uint64x1x4_t; + +typedef struct uint64x2x4_t { + uint64x2_t val[4]; +} uint64x2x4_t; + +typedef struct float16x4x4_t { + float16x4_t val[4]; +} float16x4x4_t; + +typedef struct float16x8x4_t { + float16x8_t val[4]; +} float16x8x4_t; + +typedef struct float32x2x4_t { + float32x2_t val[4]; +} float32x2x4_t; + +typedef struct float32x4x4_t { + float32x4_t val[4]; +} float32x4x4_t; + +#ifdef __aarch64__ +typedef struct float64x1x4_t { + float64x1_t val[4]; +} float64x1x4_t; + +typedef struct float64x2x4_t { + float64x2_t val[4]; +} float64x2x4_t; + +#endif +typedef struct poly8x8x4_t { + poly8x8_t val[4]; +} poly8x8x4_t; + +typedef struct poly8x16x4_t { + poly8x16_t val[4]; +} poly8x16x4_t; + +typedef struct poly16x4x4_t { + poly16x4_t val[4]; +} poly16x4x4_t; + +typedef struct poly16x8x4_t { + poly16x8_t val[4]; +} poly16x8x4_t; + +#define __ai static inline __attribute__((__always_inline__, __nodebug__)) + +__ai int16x8_t vmovl_s8(int8x8_t __a) { + return (int16x8_t)__builtin_neon_vmovl_v(__a, 33); } +__ai int32x4_t vmovl_s16(int16x4_t __a) { + return (int32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 34); } +__ai int64x2_t vmovl_s32(int32x2_t __a) { + return (int64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 35); } +__ai uint16x8_t vmovl_u8(uint8x8_t __a) { + return (uint16x8_t)__builtin_neon_vmovl_v((int8x8_t)__a, 49); } +__ai uint32x4_t vmovl_u16(uint16x4_t __a) { + return (uint32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 50); } +__ai uint64x2_t vmovl_u32(uint32x2_t __a) { + return (uint64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 51); } + +__ai int16x8_t vmull_s8(int8x8_t __a, int8x8_t __b) { + return (int16x8_t)__builtin_neon_vmull_v(__a, __b, 33); } +__ai int32x4_t vmull_s16(int16x4_t __a, int16x4_t __b) { + return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 34); } +__ai int64x2_t vmull_s32(int32x2_t __a, int32x2_t __b) { + return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 35); } +__ai uint16x8_t vmull_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 49); } +__ai uint32x4_t vmull_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 50); } +__ai uint64x2_t vmull_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 51); } +__ai poly16x8_t vmull_p8(poly8x8_t __a, poly8x8_t __b) { + return (poly16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 37); } + +__ai int8x8_t vabd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vabd_v(__a, __b, 0); } +__ai int16x4_t vabd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vabd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vabd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vabd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vabd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vabd_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vabdq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vabdq_v(__a, __b, 32); } +__ai int16x8_t vabdq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vabdq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vabdq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vabdq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vabdq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vabdq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 39); } + +__ai int16x8_t vabdl_s8(int8x8_t __a, int8x8_t __b) { + return (int16x8_t)vmovl_u8((uint8x8_t)vabd_s8(__a, __b)); } +__ai int32x4_t vabdl_s16(int16x4_t __a, int16x4_t __b) { + return (int32x4_t)vmovl_u16((uint16x4_t)vabd_s16(__a, __b)); } +__ai int64x2_t vabdl_s32(int32x2_t __a, int32x2_t __b) { + return (int64x2_t)vmovl_u32((uint32x2_t)vabd_s32(__a, __b)); } +__ai uint16x8_t vabdl_u8(uint8x8_t __a, uint8x8_t __b) { + return vmovl_u8(vabd_u8(__a, __b)); } +__ai uint32x4_t vabdl_u16(uint16x4_t __a, uint16x4_t __b) { + return vmovl_u16(vabd_u16(__a, __b)); } +__ai uint64x2_t vabdl_u32(uint32x2_t __a, uint32x2_t __b) { + return vmovl_u32(vabd_u32(__a, __b)); } + +__ai int8x8_t vaba_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a + vabd_s8(__b, __c); } +__ai int16x4_t vaba_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a + vabd_s16(__b, __c); } +__ai int32x2_t vaba_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a + vabd_s32(__b, __c); } +__ai uint8x8_t vaba_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a + vabd_u8(__b, __c); } +__ai uint16x4_t vaba_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a + vabd_u16(__b, __c); } +__ai uint32x2_t vaba_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a + vabd_u32(__b, __c); } +__ai int8x16_t vabaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { + return __a + vabdq_s8(__b, __c); } +__ai int16x8_t vabaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { + return __a + vabdq_s16(__b, __c); } +__ai int32x4_t vabaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { + return __a + vabdq_s32(__b, __c); } +__ai uint8x16_t vabaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return __a + vabdq_u8(__b, __c); } +__ai uint16x8_t vabaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return __a + vabdq_u16(__b, __c); } +__ai uint32x4_t vabaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return __a + vabdq_u32(__b, __c); } + +__ai int16x8_t vabal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a + vabdl_s8(__b, __c); } +__ai int32x4_t vabal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a + vabdl_s16(__b, __c); } +__ai int64x2_t vabal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a + vabdl_s32(__b, __c); } +__ai uint16x8_t vabal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a + vabdl_u8(__b, __c); } +__ai uint32x4_t vabal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a + vabdl_u16(__b, __c); } +__ai uint64x2_t vabal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a + vabdl_u32(__b, __c); } + + +__ai int8x8_t vabs_s8(int8x8_t __a) { + return (int8x8_t)__builtin_neon_vabs_v(__a, 0); } +__ai int16x4_t vabs_s16(int16x4_t __a) { + return (int16x4_t)__builtin_neon_vabs_v((int8x8_t)__a, 1); } +__ai int32x2_t vabs_s32(int32x2_t __a) { + return (int32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 2); } +__ai float32x2_t vabs_f32(float32x2_t __a) { + return (float32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 7); } +__ai int8x16_t vabsq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_neon_vabsq_v(__a, 32); } +__ai int16x8_t vabsq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_neon_vabsq_v((int8x16_t)__a, 33); } +__ai int32x4_t vabsq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 34); } +__ai float32x4_t vabsq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 39); } + +__ai int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b) { + return __a + __b; } +__ai int16x4_t vadd_s16(int16x4_t __a, int16x4_t __b) { + return __a + __b; } +__ai int32x2_t vadd_s32(int32x2_t __a, int32x2_t __b) { + return __a + __b; } +__ai int64x1_t vadd_s64(int64x1_t __a, int64x1_t __b) { + return __a + __b; } +__ai float32x2_t vadd_f32(float32x2_t __a, float32x2_t __b) { + return __a + __b; } +__ai uint8x8_t vadd_u8(uint8x8_t __a, uint8x8_t __b) { + return __a + __b; } +__ai uint16x4_t vadd_u16(uint16x4_t __a, uint16x4_t __b) { + return __a + __b; } +__ai uint32x2_t vadd_u32(uint32x2_t __a, uint32x2_t __b) { + return __a + __b; } +__ai uint64x1_t vadd_u64(uint64x1_t __a, uint64x1_t __b) { + return __a + __b; } +__ai int8x16_t vaddq_s8(int8x16_t __a, int8x16_t __b) { + return __a + __b; } +__ai int16x8_t vaddq_s16(int16x8_t __a, int16x8_t __b) { + return __a + __b; } +__ai int32x4_t vaddq_s32(int32x4_t __a, int32x4_t __b) { + return __a + __b; } +__ai int64x2_t vaddq_s64(int64x2_t __a, int64x2_t __b) { + return __a + __b; } +__ai float32x4_t vaddq_f32(float32x4_t __a, float32x4_t __b) { + return __a + __b; } +__ai uint8x16_t vaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a + __b; } +__ai uint16x8_t vaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a + __b; } +__ai uint32x4_t vaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a + __b; } +__ai uint64x2_t vaddq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a + __b; } + +__ai int8x8_t vaddhn_s16(int16x8_t __a, int16x8_t __b) { + return (int8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } +__ai int16x4_t vaddhn_s32(int32x4_t __a, int32x4_t __b) { + return (int16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } +__ai int32x2_t vaddhn_s64(int64x2_t __a, int64x2_t __b) { + return (int32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } +__ai uint8x8_t vaddhn_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } +__ai uint16x4_t vaddhn_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } +__ai uint32x2_t vaddhn_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } + +__ai int16x8_t vaddl_s8(int8x8_t __a, int8x8_t __b) { + return vmovl_s8(__a) + vmovl_s8(__b); } +__ai int32x4_t vaddl_s16(int16x4_t __a, int16x4_t __b) { + return vmovl_s16(__a) + vmovl_s16(__b); } +__ai int64x2_t vaddl_s32(int32x2_t __a, int32x2_t __b) { + return vmovl_s32(__a) + vmovl_s32(__b); } +__ai uint16x8_t vaddl_u8(uint8x8_t __a, uint8x8_t __b) { + return vmovl_u8(__a) + vmovl_u8(__b); } +__ai uint32x4_t vaddl_u16(uint16x4_t __a, uint16x4_t __b) { + return vmovl_u16(__a) + vmovl_u16(__b); } +__ai uint64x2_t vaddl_u32(uint32x2_t __a, uint32x2_t __b) { + return vmovl_u32(__a) + vmovl_u32(__b); } + +__ai int16x8_t vaddw_s8(int16x8_t __a, int8x8_t __b) { + return __a + vmovl_s8(__b); } +__ai int32x4_t vaddw_s16(int32x4_t __a, int16x4_t __b) { + return __a + vmovl_s16(__b); } +__ai int64x2_t vaddw_s32(int64x2_t __a, int32x2_t __b) { + return __a + vmovl_s32(__b); } +__ai uint16x8_t vaddw_u8(uint16x8_t __a, uint8x8_t __b) { + return __a + vmovl_u8(__b); } +__ai uint32x4_t vaddw_u16(uint32x4_t __a, uint16x4_t __b) { + return __a + vmovl_u16(__b); } +__ai uint64x2_t vaddw_u32(uint64x2_t __a, uint32x2_t __b) { + return __a + vmovl_u32(__b); } + +__ai int8x8_t vand_s8(int8x8_t __a, int8x8_t __b) { + return __a & __b; } +__ai int16x4_t vand_s16(int16x4_t __a, int16x4_t __b) { + return __a & __b; } +__ai int32x2_t vand_s32(int32x2_t __a, int32x2_t __b) { + return __a & __b; } +__ai int64x1_t vand_s64(int64x1_t __a, int64x1_t __b) { + return __a & __b; } +__ai uint8x8_t vand_u8(uint8x8_t __a, uint8x8_t __b) { + return __a & __b; } +__ai uint16x4_t vand_u16(uint16x4_t __a, uint16x4_t __b) { + return __a & __b; } +__ai uint32x2_t vand_u32(uint32x2_t __a, uint32x2_t __b) { + return __a & __b; } +__ai uint64x1_t vand_u64(uint64x1_t __a, uint64x1_t __b) { + return __a & __b; } +__ai int8x16_t vandq_s8(int8x16_t __a, int8x16_t __b) { + return __a & __b; } +__ai int16x8_t vandq_s16(int16x8_t __a, int16x8_t __b) { + return __a & __b; } +__ai int32x4_t vandq_s32(int32x4_t __a, int32x4_t __b) { + return __a & __b; } +__ai int64x2_t vandq_s64(int64x2_t __a, int64x2_t __b) { + return __a & __b; } +__ai uint8x16_t vandq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a & __b; } +__ai uint16x8_t vandq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a & __b; } +__ai uint32x4_t vandq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a & __b; } +__ai uint64x2_t vandq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a & __b; } + +__ai int8x8_t vbic_s8(int8x8_t __a, int8x8_t __b) { + return __a & ~__b; } +__ai int16x4_t vbic_s16(int16x4_t __a, int16x4_t __b) { + return __a & ~__b; } +__ai int32x2_t vbic_s32(int32x2_t __a, int32x2_t __b) { + return __a & ~__b; } +__ai int64x1_t vbic_s64(int64x1_t __a, int64x1_t __b) { + return __a & ~__b; } +__ai uint8x8_t vbic_u8(uint8x8_t __a, uint8x8_t __b) { + return __a & ~__b; } +__ai uint16x4_t vbic_u16(uint16x4_t __a, uint16x4_t __b) { + return __a & ~__b; } +__ai uint32x2_t vbic_u32(uint32x2_t __a, uint32x2_t __b) { + return __a & ~__b; } +__ai uint64x1_t vbic_u64(uint64x1_t __a, uint64x1_t __b) { + return __a & ~__b; } +__ai int8x16_t vbicq_s8(int8x16_t __a, int8x16_t __b) { + return __a & ~__b; } +__ai int16x8_t vbicq_s16(int16x8_t __a, int16x8_t __b) { + return __a & ~__b; } +__ai int32x4_t vbicq_s32(int32x4_t __a, int32x4_t __b) { + return __a & ~__b; } +__ai int64x2_t vbicq_s64(int64x2_t __a, int64x2_t __b) { + return __a & ~__b; } +__ai uint8x16_t vbicq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a & ~__b; } +__ai uint16x8_t vbicq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a & ~__b; } +__ai uint32x4_t vbicq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a & ~__b; } +__ai uint64x2_t vbicq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a & ~__b; } + +__ai int8x8_t vbsl_s8(uint8x8_t __a, int8x8_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, __b, __c, 0); } +__ai int16x4_t vbsl_s16(uint16x4_t __a, int16x4_t __b, int16x4_t __c) { + return (int16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 1); } +__ai int32x2_t vbsl_s32(uint32x2_t __a, int32x2_t __b, int32x2_t __c) { + return (int32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 2); } +__ai int64x1_t vbsl_s64(uint64x1_t __a, int64x1_t __b, int64x1_t __c) { + return (int64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 3); } +__ai uint8x8_t vbsl_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 16); } +__ai uint16x4_t vbsl_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return (uint16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 17); } +__ai uint32x2_t vbsl_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return (uint32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 18); } +__ai uint64x1_t vbsl_u64(uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) { + return (uint64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 19); } +__ai float32x2_t vbsl_f32(uint32x2_t __a, float32x2_t __b, float32x2_t __c) { + return (float32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 7); } +__ai poly8x8_t vbsl_p8(uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) { + return (poly8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 4); } +__ai poly16x4_t vbsl_p16(uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) { + return (poly16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 5); } +__ai int8x16_t vbslq_s8(uint8x16_t __a, int8x16_t __b, int8x16_t __c) { + return (int8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, __b, __c, 32); } +__ai int16x8_t vbslq_s16(uint16x8_t __a, int16x8_t __b, int16x8_t __c) { + return (int16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 33); } +__ai int32x4_t vbslq_s32(uint32x4_t __a, int32x4_t __b, int32x4_t __c) { + return (int32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 34); } +__ai int64x2_t vbslq_s64(uint64x2_t __a, int64x2_t __b, int64x2_t __c) { + return (int64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 35); } +__ai uint8x16_t vbslq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return (uint8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 48); } +__ai uint16x8_t vbslq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return (uint16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 49); } +__ai uint32x4_t vbslq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } +__ai uint64x2_t vbslq_u64(uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return (uint64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 51); } +__ai float32x4_t vbslq_f32(uint32x4_t __a, float32x4_t __b, float32x4_t __c) { + return (float32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 39); } +__ai poly8x16_t vbslq_p8(uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) { + return (poly8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 36); } +__ai poly16x8_t vbslq_p16(uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) { + return (poly16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 37); } + +__ai uint32x2_t vcage_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)__builtin_neon_vcage_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint32x4_t vcageq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai uint32x2_t vcagt_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)__builtin_neon_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint32x4_t vcagtq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai uint32x2_t vcale_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)__builtin_neon_vcale_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint32x4_t vcaleq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai uint32x2_t vcalt_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)__builtin_neon_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint32x4_t vcaltq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai uint8x8_t vceq_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a == __b); } +__ai uint16x4_t vceq_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a == __b); } +__ai uint32x2_t vceq_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a == __b); } +__ai uint32x2_t vceq_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a == __b); } +__ai uint8x8_t vceq_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a == __b); } +__ai uint16x4_t vceq_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a == __b); } +__ai uint32x2_t vceq_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a == __b); } +__ai uint8x8_t vceq_p8(poly8x8_t __a, poly8x8_t __b) { + return (uint8x8_t)(__a == __b); } +__ai uint8x16_t vceqq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a == __b); } +__ai uint16x8_t vceqq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a == __b); } +__ai uint32x4_t vceqq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a == __b); } +__ai uint32x4_t vceqq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a == __b); } +__ai uint8x16_t vceqq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a == __b); } +__ai uint16x8_t vceqq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a == __b); } +__ai uint32x4_t vceqq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a == __b); } +__ai uint8x16_t vceqq_p8(poly8x16_t __a, poly8x16_t __b) { + return (uint8x16_t)(__a == __b); } + +__ai uint8x8_t vcge_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a >= __b); } +__ai uint16x4_t vcge_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a >= __b); } +__ai uint32x2_t vcge_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a >= __b); } +__ai uint32x2_t vcge_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a >= __b); } +__ai uint8x8_t vcge_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a >= __b); } +__ai uint16x4_t vcge_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a >= __b); } +__ai uint32x2_t vcge_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a >= __b); } +__ai uint8x16_t vcgeq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a >= __b); } +__ai uint16x8_t vcgeq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a >= __b); } +__ai uint32x4_t vcgeq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a >= __b); } +__ai uint32x4_t vcgeq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a >= __b); } +__ai uint8x16_t vcgeq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a >= __b); } +__ai uint16x8_t vcgeq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a >= __b); } +__ai uint32x4_t vcgeq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a >= __b); } + +__ai uint8x8_t vcgt_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a > __b); } +__ai uint16x4_t vcgt_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a > __b); } +__ai uint32x2_t vcgt_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a > __b); } +__ai uint32x2_t vcgt_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a > __b); } +__ai uint8x8_t vcgt_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a > __b); } +__ai uint16x4_t vcgt_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a > __b); } +__ai uint32x2_t vcgt_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a > __b); } +__ai uint8x16_t vcgtq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a > __b); } +__ai uint16x8_t vcgtq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a > __b); } +__ai uint32x4_t vcgtq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a > __b); } +__ai uint32x4_t vcgtq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a > __b); } +__ai uint8x16_t vcgtq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a > __b); } +__ai uint16x8_t vcgtq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a > __b); } +__ai uint32x4_t vcgtq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a > __b); } + +__ai uint8x8_t vcle_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a <= __b); } +__ai uint16x4_t vcle_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a <= __b); } +__ai uint32x2_t vcle_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a <= __b); } +__ai uint32x2_t vcle_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a <= __b); } +__ai uint8x8_t vcle_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a <= __b); } +__ai uint16x4_t vcle_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a <= __b); } +__ai uint32x2_t vcle_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a <= __b); } +__ai uint8x16_t vcleq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a <= __b); } +__ai uint16x8_t vcleq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a <= __b); } +__ai uint32x4_t vcleq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a <= __b); } +__ai uint32x4_t vcleq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a <= __b); } +__ai uint8x16_t vcleq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a <= __b); } +__ai uint16x8_t vcleq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a <= __b); } +__ai uint32x4_t vcleq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a <= __b); } + +__ai int8x8_t vcls_s8(int8x8_t __a) { + return (int8x8_t)__builtin_neon_vcls_v(__a, 0); } +__ai int16x4_t vcls_s16(int16x4_t __a) { + return (int16x4_t)__builtin_neon_vcls_v((int8x8_t)__a, 1); } +__ai int32x2_t vcls_s32(int32x2_t __a) { + return (int32x2_t)__builtin_neon_vcls_v((int8x8_t)__a, 2); } +__ai int8x16_t vclsq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_neon_vclsq_v(__a, 32); } +__ai int16x8_t vclsq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_neon_vclsq_v((int8x16_t)__a, 33); } +__ai int32x4_t vclsq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_neon_vclsq_v((int8x16_t)__a, 34); } + +__ai uint8x8_t vclt_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)(__a < __b); } +__ai uint16x4_t vclt_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)(__a < __b); } +__ai uint32x2_t vclt_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)(__a < __b); } +__ai uint32x2_t vclt_f32(float32x2_t __a, float32x2_t __b) { + return (uint32x2_t)(__a < __b); } +__ai uint8x8_t vclt_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)(__a < __b); } +__ai uint16x4_t vclt_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)(__a < __b); } +__ai uint32x2_t vclt_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)(__a < __b); } +__ai uint8x16_t vcltq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)(__a < __b); } +__ai uint16x8_t vcltq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)(__a < __b); } +__ai uint32x4_t vcltq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)(__a < __b); } +__ai uint32x4_t vcltq_f32(float32x4_t __a, float32x4_t __b) { + return (uint32x4_t)(__a < __b); } +__ai uint8x16_t vcltq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)(__a < __b); } +__ai uint16x8_t vcltq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)(__a < __b); } +__ai uint32x4_t vcltq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)(__a < __b); } + +__ai int8x8_t vclz_s8(int8x8_t __a) { + return (int8x8_t)__builtin_neon_vclz_v(__a, 0); } +__ai int16x4_t vclz_s16(int16x4_t __a) { + return (int16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 1); } +__ai int32x2_t vclz_s32(int32x2_t __a) { + return (int32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 2); } +__ai uint8x8_t vclz_u8(uint8x8_t __a) { + return (uint8x8_t)__builtin_neon_vclz_v((int8x8_t)__a, 16); } +__ai uint16x4_t vclz_u16(uint16x4_t __a) { + return (uint16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 17); } +__ai uint32x2_t vclz_u32(uint32x2_t __a) { + return (uint32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 18); } +__ai int8x16_t vclzq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_neon_vclzq_v(__a, 32); } +__ai int16x8_t vclzq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 33); } +__ai int32x4_t vclzq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 34); } +__ai uint8x16_t vclzq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_neon_vclzq_v((int8x16_t)__a, 48); } +__ai uint16x8_t vclzq_u16(uint16x8_t __a) { + return (uint16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 49); } +__ai uint32x4_t vclzq_u32(uint32x4_t __a) { + return (uint32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 50); } + +__ai uint8x8_t vcnt_u8(uint8x8_t __a) { + return (uint8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 16); } +__ai int8x8_t vcnt_s8(int8x8_t __a) { + return (int8x8_t)__builtin_neon_vcnt_v(__a, 0); } +__ai poly8x8_t vcnt_p8(poly8x8_t __a) { + return (poly8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 4); } +__ai uint8x16_t vcntq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 48); } +__ai int8x16_t vcntq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_neon_vcntq_v(__a, 32); } +__ai poly8x16_t vcntq_p8(poly8x16_t __a) { + return (poly8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 36); } + +__ai int8x16_t vcombine_s8(int8x8_t __a, int8x8_t __b) { + return (int8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai int16x8_t vcombine_s16(int16x4_t __a, int16x4_t __b) { + return (int16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai int32x4_t vcombine_s32(int32x2_t __a, int32x2_t __b) { + return (int32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai int64x2_t vcombine_s64(int64x1_t __a, int64x1_t __b) { + return (int64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai float16x8_t vcombine_f16(float16x4_t __a, float16x4_t __b) { + return (float16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai float32x4_t vcombine_f32(float32x2_t __a, float32x2_t __b) { + return (float32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai uint8x16_t vcombine_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai uint16x8_t vcombine_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai uint32x4_t vcombine_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai uint64x2_t vcombine_u64(uint64x1_t __a, uint64x1_t __b) { + return (uint64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai poly8x16_t vcombine_p8(poly8x8_t __a, poly8x8_t __b) { + return (poly8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } +__ai poly16x8_t vcombine_p16(poly16x4_t __a, poly16x4_t __b) { + return (poly16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); } + +__ai int8x8_t vcreate_s8(uint64_t __a) { + return (int8x8_t)__a; } +__ai int16x4_t vcreate_s16(uint64_t __a) { + return (int16x4_t)__a; } +__ai int32x2_t vcreate_s32(uint64_t __a) { + return (int32x2_t)__a; } +__ai float16x4_t vcreate_f16(uint64_t __a) { + return (float16x4_t)__a; } +__ai float32x2_t vcreate_f32(uint64_t __a) { + return (float32x2_t)__a; } +__ai uint8x8_t vcreate_u8(uint64_t __a) { + return (uint8x8_t)__a; } +__ai uint16x4_t vcreate_u16(uint64_t __a) { + return (uint16x4_t)__a; } +__ai uint32x2_t vcreate_u32(uint64_t __a) { + return (uint32x2_t)__a; } +__ai uint64x1_t vcreate_u64(uint64_t __a) { + return (uint64x1_t)__a; } +__ai poly8x8_t vcreate_p8(uint64_t __a) { + return (poly8x8_t)__a; } +__ai poly16x4_t vcreate_p16(uint64_t __a) { + return (poly16x4_t)__a; } +__ai int64x1_t vcreate_s64(uint64_t __a) { + return (int64x1_t)__a; } + +__ai float16x4_t vcvt_f16_f32(float32x4_t __a) { + return (float16x4_t)__builtin_neon_vcvt_f16_v((int8x16_t)__a, 6); } + +__ai float32x2_t vcvt_f32_s32(int32x2_t __a) { + return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 2); } +__ai float32x2_t vcvt_f32_u32(uint32x2_t __a) { + return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 18); } +__ai float32x4_t vcvtq_f32_s32(int32x4_t __a) { + return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 34); } +__ai float32x4_t vcvtq_f32_u32(uint32x4_t __a) { + return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 50); } + +__ai float32x4_t vcvt_f32_f16(float16x4_t __a) { + return (float32x4_t)__builtin_neon_vcvt_f32_f16((int8x8_t)__a, 6); } + +#define vcvt_n_f32_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 2); }) +#define vcvt_n_f32_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 18); }) +#define vcvtq_n_f32_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 34); }) +#define vcvtq_n_f32_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 50); }) + +#define vcvt_n_s32_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vcvt_n_s32_v((int8x8_t)__a, __b, 2); }) +#define vcvtq_n_s32_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + (int32x4_t)__builtin_neon_vcvtq_n_s32_v((int8x16_t)__a, __b, 34); }) + +#define vcvt_n_u32_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vcvt_n_u32_v((int8x8_t)__a, __b, 18); }) +#define vcvtq_n_u32_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + (uint32x4_t)__builtin_neon_vcvtq_n_u32_v((int8x16_t)__a, __b, 50); }) + +__ai int32x2_t vcvt_s32_f32(float32x2_t __a) { + return (int32x2_t)__builtin_neon_vcvt_s32_v((int8x8_t)__a, 2); } +__ai int32x4_t vcvtq_s32_f32(float32x4_t __a) { + return (int32x4_t)__builtin_neon_vcvtq_s32_v((int8x16_t)__a, 34); } + +__ai uint32x2_t vcvt_u32_f32(float32x2_t __a) { + return (uint32x2_t)__builtin_neon_vcvt_u32_v((int8x8_t)__a, 18); } +__ai uint32x4_t vcvtq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__builtin_neon_vcvtq_u32_v((int8x16_t)__a, 50); } + +#define vdup_lane_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_lane_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_lane_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdup_lane_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_lane_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_lane_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdup_lane_p8(a, __b) __extension__ ({ \ + poly8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_lane_p16(a, __b) __extension__ ({ \ + poly16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_lane_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdupq_lane_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdupq_lane_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdupq_lane_p8(a, __b) __extension__ ({ \ + poly8x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_p16(a, __b) __extension__ ({ \ + poly16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_lane_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b); }) +#define vdup_lane_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b); }) +#define vdupq_lane_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdupq_lane_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) + +__ai uint8x8_t vdup_n_u8(uint8_t __a) { + return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint16x4_t vdup_n_u16(uint16_t __a) { + return (uint16x4_t){ __a, __a, __a, __a }; } +__ai uint32x2_t vdup_n_u32(uint32_t __a) { + return (uint32x2_t){ __a, __a }; } +__ai int8x8_t vdup_n_s8(int8_t __a) { + return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int16x4_t vdup_n_s16(int16_t __a) { + return (int16x4_t){ __a, __a, __a, __a }; } +__ai int32x2_t vdup_n_s32(int32_t __a) { + return (int32x2_t){ __a, __a }; } +__ai poly8x8_t vdup_n_p8(poly8_t __a) { + return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly16x4_t vdup_n_p16(poly16_t __a) { + return (poly16x4_t){ __a, __a, __a, __a }; } +__ai float32x2_t vdup_n_f32(float32_t __a) { + return (float32x2_t){ __a, __a }; } +__ai uint8x16_t vdupq_n_u8(uint8_t __a) { + return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint16x8_t vdupq_n_u16(uint16_t __a) { + return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint32x4_t vdupq_n_u32(uint32_t __a) { + return (uint32x4_t){ __a, __a, __a, __a }; } +__ai int8x16_t vdupq_n_s8(int8_t __a) { + return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int16x8_t vdupq_n_s16(int16_t __a) { + return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int32x4_t vdupq_n_s32(int32_t __a) { + return (int32x4_t){ __a, __a, __a, __a }; } +__ai poly8x16_t vdupq_n_p8(poly8_t __a) { + return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly16x8_t vdupq_n_p16(poly16_t __a) { + return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai float32x4_t vdupq_n_f32(float32_t __a) { + return (float32x4_t){ __a, __a, __a, __a }; } +__ai int64x1_t vdup_n_s64(int64_t __a) { + return (int64x1_t){ __a }; } +__ai uint64x1_t vdup_n_u64(uint64_t __a) { + return (uint64x1_t){ __a }; } +__ai int64x2_t vdupq_n_s64(int64_t __a) { + return (int64x2_t){ __a, __a }; } +__ai uint64x2_t vdupq_n_u64(uint64_t __a) { + return (uint64x2_t){ __a, __a }; } + +__ai int8x8_t veor_s8(int8x8_t __a, int8x8_t __b) { + return __a ^ __b; } +__ai int16x4_t veor_s16(int16x4_t __a, int16x4_t __b) { + return __a ^ __b; } +__ai int32x2_t veor_s32(int32x2_t __a, int32x2_t __b) { + return __a ^ __b; } +__ai int64x1_t veor_s64(int64x1_t __a, int64x1_t __b) { + return __a ^ __b; } +__ai uint8x8_t veor_u8(uint8x8_t __a, uint8x8_t __b) { + return __a ^ __b; } +__ai uint16x4_t veor_u16(uint16x4_t __a, uint16x4_t __b) { + return __a ^ __b; } +__ai uint32x2_t veor_u32(uint32x2_t __a, uint32x2_t __b) { + return __a ^ __b; } +__ai uint64x1_t veor_u64(uint64x1_t __a, uint64x1_t __b) { + return __a ^ __b; } +__ai int8x16_t veorq_s8(int8x16_t __a, int8x16_t __b) { + return __a ^ __b; } +__ai int16x8_t veorq_s16(int16x8_t __a, int16x8_t __b) { + return __a ^ __b; } +__ai int32x4_t veorq_s32(int32x4_t __a, int32x4_t __b) { + return __a ^ __b; } +__ai int64x2_t veorq_s64(int64x2_t __a, int64x2_t __b) { + return __a ^ __b; } +__ai uint8x16_t veorq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a ^ __b; } +__ai uint16x8_t veorq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a ^ __b; } +__ai uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a ^ __b; } +__ai uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a ^ __b; } + +#define vext_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_neon_vext_v(__a, __b, __c, 0); }) +#define vext_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vext_p8(a, b, __c) __extension__ ({ \ + poly8x8_t __a = (a); poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) +#define vext_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vext_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vext_p16(a, b, __c) __extension__ ({ \ + poly16x4_t __a = (a); poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) +#define vext_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vext_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vext_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vext_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vext_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); \ + (float32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 7); }) +#define vextq_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_neon_vextq_v(__a, __b, __c, 32); }) +#define vextq_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vextq_p8(a, b, __c) __extension__ ({ \ + poly8x16_t __a = (a); poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) +#define vextq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vextq_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vextq_p16(a, b, __c) __extension__ ({ \ + poly16x8_t __a = (a); poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) +#define vextq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vextq_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vextq_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vextq_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) +#define vextq_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); \ + (float32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 39); }) + +__ai float32x2_t vfma_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { + return (float32x2_t)__builtin_neon_vfma_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 7); } +__ai float32x4_t vfmaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { + return (float32x4_t)__builtin_neon_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 39); } + +__ai int8x8_t vget_high_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai int16x4_t vget_high_s16(int16x8_t __a) { + return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); } +__ai int32x2_t vget_high_s32(int32x4_t __a) { + return __builtin_shufflevector(__a, __a, 2, 3); } +__ai int64x1_t vget_high_s64(int64x2_t __a) { + return __builtin_shufflevector(__a, __a, 1); } +__ai float16x4_t vget_high_f16(float16x8_t __a) { + return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); } +__ai float32x2_t vget_high_f32(float32x4_t __a) { + return __builtin_shufflevector(__a, __a, 2, 3); } +__ai uint8x8_t vget_high_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai uint16x4_t vget_high_u16(uint16x8_t __a) { + return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); } +__ai uint32x2_t vget_high_u32(uint32x4_t __a) { + return __builtin_shufflevector(__a, __a, 2, 3); } +__ai uint64x1_t vget_high_u64(uint64x2_t __a) { + return __builtin_shufflevector(__a, __a, 1); } +__ai poly8x8_t vget_high_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 8, 9, 10, 11, 12, 13, 14, 15); } +__ai poly16x4_t vget_high_p16(poly16x8_t __a) { + return __builtin_shufflevector(__a, __a, 4, 5, 6, 7); } + +#define vget_lane_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); }) +#define vget_lane_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); }) +#define vget_lane_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32_t)__builtin_neon_vget_lane_i32((int32x2_t)__a, __b); }) +#define vget_lane_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8_t)__builtin_neon_vget_lane_i8(__a, __b); }) +#define vget_lane_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16_t)__builtin_neon_vget_lane_i16(__a, __b); }) +#define vget_lane_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32_t)__builtin_neon_vget_lane_i32(__a, __b); }) +#define vget_lane_p8(a, __b) __extension__ ({ \ + poly8x8_t __a = (a); \ + (poly8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); }) +#define vget_lane_p16(a, __b) __extension__ ({ \ + poly16x4_t __a = (a); \ + (poly16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); }) +#define vget_lane_f32(a, __b) __extension__ ({ \ + float32x2_t __a = (a); \ + (float32_t)__builtin_neon_vget_lane_f32(__a, __b); }) +#define vgetq_lane_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); }) +#define vgetq_lane_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); }) +#define vgetq_lane_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32_t)__builtin_neon_vgetq_lane_i32((int32x4_t)__a, __b); }) +#define vgetq_lane_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8_t)__builtin_neon_vgetq_lane_i8(__a, __b); }) +#define vgetq_lane_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16_t)__builtin_neon_vgetq_lane_i16(__a, __b); }) +#define vgetq_lane_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32_t)__builtin_neon_vgetq_lane_i32(__a, __b); }) +#define vgetq_lane_p8(a, __b) __extension__ ({ \ + poly8x16_t __a = (a); \ + (poly8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); }) +#define vgetq_lane_p16(a, __b) __extension__ ({ \ + poly16x8_t __a = (a); \ + (poly16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); }) +#define vgetq_lane_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + (float32_t)__builtin_neon_vgetq_lane_f32(__a, __b); }) +#define vget_lane_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64_t)__builtin_neon_vget_lane_i64(__a, __b); }) +#define vget_lane_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64_t)__builtin_neon_vget_lane_i64((int64x1_t)__a, __b); }) +#define vgetq_lane_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64_t)__builtin_neon_vgetq_lane_i64(__a, __b); }) +#define vgetq_lane_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64_t)__builtin_neon_vgetq_lane_i64((int64x2_t)__a, __b); }) + +__ai int8x8_t vget_low_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai int16x4_t vget_low_s16(int16x8_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +__ai int32x2_t vget_low_s32(int32x4_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1); } +__ai int64x1_t vget_low_s64(int64x2_t __a) { + return __builtin_shufflevector(__a, __a, 0); } +__ai float16x4_t vget_low_f16(float16x8_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +__ai float32x2_t vget_low_f32(float32x4_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1); } +__ai uint8x8_t vget_low_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai uint16x4_t vget_low_u16(uint16x8_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } +__ai uint32x2_t vget_low_u32(uint32x4_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1); } +__ai uint64x1_t vget_low_u64(uint64x2_t __a) { + return __builtin_shufflevector(__a, __a, 0); } +__ai poly8x8_t vget_low_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } +__ai poly16x4_t vget_low_p16(poly16x8_t __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } + +__ai int8x8_t vhadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vhadd_v(__a, __b, 0); } +__ai int16x4_t vhadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vhadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vhadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vhadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vhadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vhaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vhaddq_v(__a, __b, 32); } +__ai int16x8_t vhaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vhaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vhaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vhaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vhaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai int8x8_t vhsub_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vhsub_v(__a, __b, 0); } +__ai int16x4_t vhsub_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vhsub_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vhsub_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vhsub_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vhsub_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vhsubq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vhsubq_v(__a, __b, 32); } +__ai int16x8_t vhsubq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vhsubq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vhsubq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vhsubq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vhsubq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +#define vld1q_u8(__a) __extension__ ({ \ + (uint8x16_t)__builtin_neon_vld1q_v(__a, 48); }) +#define vld1q_u16(__a) __extension__ ({ \ + (uint16x8_t)__builtin_neon_vld1q_v(__a, 49); }) +#define vld1q_u32(__a) __extension__ ({ \ + (uint32x4_t)__builtin_neon_vld1q_v(__a, 50); }) +#define vld1q_u64(__a) __extension__ ({ \ + (uint64x2_t)__builtin_neon_vld1q_v(__a, 51); }) +#define vld1q_s8(__a) __extension__ ({ \ + (int8x16_t)__builtin_neon_vld1q_v(__a, 32); }) +#define vld1q_s16(__a) __extension__ ({ \ + (int16x8_t)__builtin_neon_vld1q_v(__a, 33); }) +#define vld1q_s32(__a) __extension__ ({ \ + (int32x4_t)__builtin_neon_vld1q_v(__a, 34); }) +#define vld1q_s64(__a) __extension__ ({ \ + (int64x2_t)__builtin_neon_vld1q_v(__a, 35); }) +#define vld1q_f16(__a) __extension__ ({ \ + (float16x8_t)__builtin_neon_vld1q_v(__a, 38); }) +#define vld1q_f32(__a) __extension__ ({ \ + (float32x4_t)__builtin_neon_vld1q_v(__a, 39); }) +#define vld1q_p8(__a) __extension__ ({ \ + (poly8x16_t)__builtin_neon_vld1q_v(__a, 36); }) +#define vld1q_p16(__a) __extension__ ({ \ + (poly16x8_t)__builtin_neon_vld1q_v(__a, 37); }) +#define vld1_u8(__a) __extension__ ({ \ + (uint8x8_t)__builtin_neon_vld1_v(__a, 16); }) +#define vld1_u16(__a) __extension__ ({ \ + (uint16x4_t)__builtin_neon_vld1_v(__a, 17); }) +#define vld1_u32(__a) __extension__ ({ \ + (uint32x2_t)__builtin_neon_vld1_v(__a, 18); }) +#define vld1_u64(__a) __extension__ ({ \ + (uint64x1_t)__builtin_neon_vld1_v(__a, 19); }) +#define vld1_s8(__a) __extension__ ({ \ + (int8x8_t)__builtin_neon_vld1_v(__a, 0); }) +#define vld1_s16(__a) __extension__ ({ \ + (int16x4_t)__builtin_neon_vld1_v(__a, 1); }) +#define vld1_s32(__a) __extension__ ({ \ + (int32x2_t)__builtin_neon_vld1_v(__a, 2); }) +#define vld1_s64(__a) __extension__ ({ \ + (int64x1_t)__builtin_neon_vld1_v(__a, 3); }) +#define vld1_f16(__a) __extension__ ({ \ + (float16x4_t)__builtin_neon_vld1_v(__a, 6); }) +#define vld1_f32(__a) __extension__ ({ \ + (float32x2_t)__builtin_neon_vld1_v(__a, 7); }) +#define vld1_p8(__a) __extension__ ({ \ + (poly8x8_t)__builtin_neon_vld1_v(__a, 4); }) +#define vld1_p16(__a) __extension__ ({ \ + (poly16x4_t)__builtin_neon_vld1_v(__a, 5); }) + +#define vld1q_dup_u8(__a) __extension__ ({ \ + (uint8x16_t)__builtin_neon_vld1q_dup_v(__a, 48); }) +#define vld1q_dup_u16(__a) __extension__ ({ \ + (uint16x8_t)__builtin_neon_vld1q_dup_v(__a, 49); }) +#define vld1q_dup_u32(__a) __extension__ ({ \ + (uint32x4_t)__builtin_neon_vld1q_dup_v(__a, 50); }) +#define vld1q_dup_u64(__a) __extension__ ({ \ + (uint64x2_t)__builtin_neon_vld1q_dup_v(__a, 51); }) +#define vld1q_dup_s8(__a) __extension__ ({ \ + (int8x16_t)__builtin_neon_vld1q_dup_v(__a, 32); }) +#define vld1q_dup_s16(__a) __extension__ ({ \ + (int16x8_t)__builtin_neon_vld1q_dup_v(__a, 33); }) +#define vld1q_dup_s32(__a) __extension__ ({ \ + (int32x4_t)__builtin_neon_vld1q_dup_v(__a, 34); }) +#define vld1q_dup_s64(__a) __extension__ ({ \ + (int64x2_t)__builtin_neon_vld1q_dup_v(__a, 35); }) +#define vld1q_dup_f16(__a) __extension__ ({ \ + (float16x8_t)__builtin_neon_vld1q_dup_v(__a, 38); }) +#define vld1q_dup_f32(__a) __extension__ ({ \ + (float32x4_t)__builtin_neon_vld1q_dup_v(__a, 39); }) +#define vld1q_dup_p8(__a) __extension__ ({ \ + (poly8x16_t)__builtin_neon_vld1q_dup_v(__a, 36); }) +#define vld1q_dup_p16(__a) __extension__ ({ \ + (poly16x8_t)__builtin_neon_vld1q_dup_v(__a, 37); }) +#define vld1_dup_u8(__a) __extension__ ({ \ + (uint8x8_t)__builtin_neon_vld1_dup_v(__a, 16); }) +#define vld1_dup_u16(__a) __extension__ ({ \ + (uint16x4_t)__builtin_neon_vld1_dup_v(__a, 17); }) +#define vld1_dup_u32(__a) __extension__ ({ \ + (uint32x2_t)__builtin_neon_vld1_dup_v(__a, 18); }) +#define vld1_dup_u64(__a) __extension__ ({ \ + (uint64x1_t)__builtin_neon_vld1_dup_v(__a, 19); }) +#define vld1_dup_s8(__a) __extension__ ({ \ + (int8x8_t)__builtin_neon_vld1_dup_v(__a, 0); }) +#define vld1_dup_s16(__a) __extension__ ({ \ + (int16x4_t)__builtin_neon_vld1_dup_v(__a, 1); }) +#define vld1_dup_s32(__a) __extension__ ({ \ + (int32x2_t)__builtin_neon_vld1_dup_v(__a, 2); }) +#define vld1_dup_s64(__a) __extension__ ({ \ + (int64x1_t)__builtin_neon_vld1_dup_v(__a, 3); }) +#define vld1_dup_f16(__a) __extension__ ({ \ + (float16x4_t)__builtin_neon_vld1_dup_v(__a, 6); }) +#define vld1_dup_f32(__a) __extension__ ({ \ + (float32x2_t)__builtin_neon_vld1_dup_v(__a, 7); }) +#define vld1_dup_p8(__a) __extension__ ({ \ + (poly8x8_t)__builtin_neon_vld1_dup_v(__a, 4); }) +#define vld1_dup_p16(__a) __extension__ ({ \ + (poly16x4_t)__builtin_neon_vld1_dup_v(__a, 5); }) + +#define vld1q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 48); }) +#define vld1q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 49); }) +#define vld1q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 50); }) +#define vld1q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 51); }) +#define vld1q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16_t __b = (b); \ + (int8x16_t)__builtin_neon_vld1q_lane_v(__a, __b, __c, 32); }) +#define vld1q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8_t __b = (b); \ + (int16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 33); }) +#define vld1q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4_t __b = (b); \ + (int32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 34); }) +#define vld1q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2_t __b = (b); \ + (int64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 35); }) +#define vld1q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8_t __b = (b); \ + (float16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 38); }) +#define vld1q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4_t __b = (b); \ + (float32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 39); }) +#define vld1q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 36); }) +#define vld1q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 37); }) +#define vld1_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 16); }) +#define vld1_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 17); }) +#define vld1_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 18); }) +#define vld1_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 19); }) +#define vld1_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8_t __b = (b); \ + (int8x8_t)__builtin_neon_vld1_lane_v(__a, __b, __c, 0); }) +#define vld1_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4_t __b = (b); \ + (int16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 1); }) +#define vld1_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2_t __b = (b); \ + (int32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 2); }) +#define vld1_lane_s64(__a, b, __c) __extension__ ({ \ + int64x1_t __b = (b); \ + (int64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 3); }) +#define vld1_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4_t __b = (b); \ + (float16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 6); }) +#define vld1_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2_t __b = (b); \ + (float32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 7); }) +#define vld1_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 4); }) +#define vld1_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 5); }) + +#define vld2q_u8(__a) __extension__ ({ \ + uint8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 48); r; }) +#define vld2q_u16(__a) __extension__ ({ \ + uint16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 49); r; }) +#define vld2q_u32(__a) __extension__ ({ \ + uint32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 50); r; }) +#define vld2q_s8(__a) __extension__ ({ \ + int8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 32); r; }) +#define vld2q_s16(__a) __extension__ ({ \ + int16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 33); r; }) +#define vld2q_s32(__a) __extension__ ({ \ + int32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 34); r; }) +#define vld2q_f16(__a) __extension__ ({ \ + float16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 38); r; }) +#define vld2q_f32(__a) __extension__ ({ \ + float32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 39); r; }) +#define vld2q_p8(__a) __extension__ ({ \ + poly8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 36); r; }) +#define vld2q_p16(__a) __extension__ ({ \ + poly16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 37); r; }) +#define vld2_u8(__a) __extension__ ({ \ + uint8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 16); r; }) +#define vld2_u16(__a) __extension__ ({ \ + uint16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 17); r; }) +#define vld2_u32(__a) __extension__ ({ \ + uint32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 18); r; }) +#define vld2_u64(__a) __extension__ ({ \ + uint64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 19); r; }) +#define vld2_s8(__a) __extension__ ({ \ + int8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 0); r; }) +#define vld2_s16(__a) __extension__ ({ \ + int16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 1); r; }) +#define vld2_s32(__a) __extension__ ({ \ + int32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 2); r; }) +#define vld2_s64(__a) __extension__ ({ \ + int64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 3); r; }) +#define vld2_f16(__a) __extension__ ({ \ + float16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 6); r; }) +#define vld2_f32(__a) __extension__ ({ \ + float32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 7); r; }) +#define vld2_p8(__a) __extension__ ({ \ + poly8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 4); r; }) +#define vld2_p16(__a) __extension__ ({ \ + poly16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 5); r; }) + +#define vld2_dup_u8(__a) __extension__ ({ \ + uint8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 16); r; }) +#define vld2_dup_u16(__a) __extension__ ({ \ + uint16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 17); r; }) +#define vld2_dup_u32(__a) __extension__ ({ \ + uint32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 18); r; }) +#define vld2_dup_u64(__a) __extension__ ({ \ + uint64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 19); r; }) +#define vld2_dup_s8(__a) __extension__ ({ \ + int8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 0); r; }) +#define vld2_dup_s16(__a) __extension__ ({ \ + int16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 1); r; }) +#define vld2_dup_s32(__a) __extension__ ({ \ + int32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 2); r; }) +#define vld2_dup_s64(__a) __extension__ ({ \ + int64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 3); r; }) +#define vld2_dup_f16(__a) __extension__ ({ \ + float16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 6); r; }) +#define vld2_dup_f32(__a) __extension__ ({ \ + float32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 7); r; }) +#define vld2_dup_p8(__a) __extension__ ({ \ + poly8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 4); r; }) +#define vld2_dup_p16(__a) __extension__ ({ \ + poly16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 5); r; }) + +#define vld2q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x2_t __b = (b); \ + uint16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); r; }) +#define vld2q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x2_t __b = (b); \ + uint32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); r; }) +#define vld2q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x2_t __b = (b); \ + int16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); r; }) +#define vld2q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x2_t __b = (b); \ + int32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); r; }) +#define vld2q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x2_t __b = (b); \ + float16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); r; }) +#define vld2q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x2_t __b = (b); \ + float32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); r; }) +#define vld2q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x2_t __b = (b); \ + poly16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); r; }) +#define vld2_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x2_t __b = (b); \ + uint8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 16); r; }) +#define vld2_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x2_t __b = (b); \ + uint16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 17); r; }) +#define vld2_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x2_t __b = (b); \ + uint32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 18); r; }) +#define vld2_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x2_t __b = (b); \ + int8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 0); r; }) +#define vld2_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x2_t __b = (b); \ + int16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 1); r; }) +#define vld2_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x2_t __b = (b); \ + int32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 2); r; }) +#define vld2_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x2_t __b = (b); \ + float16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 6); r; }) +#define vld2_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x2_t __b = (b); \ + float32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 7); r; }) +#define vld2_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x2_t __b = (b); \ + poly8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 4); r; }) +#define vld2_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x2_t __b = (b); \ + poly16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 5); r; }) + +#define vld3q_u8(__a) __extension__ ({ \ + uint8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 48); r; }) +#define vld3q_u16(__a) __extension__ ({ \ + uint16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 49); r; }) +#define vld3q_u32(__a) __extension__ ({ \ + uint32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 50); r; }) +#define vld3q_s8(__a) __extension__ ({ \ + int8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 32); r; }) +#define vld3q_s16(__a) __extension__ ({ \ + int16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 33); r; }) +#define vld3q_s32(__a) __extension__ ({ \ + int32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 34); r; }) +#define vld3q_f16(__a) __extension__ ({ \ + float16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 38); r; }) +#define vld3q_f32(__a) __extension__ ({ \ + float32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 39); r; }) +#define vld3q_p8(__a) __extension__ ({ \ + poly8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 36); r; }) +#define vld3q_p16(__a) __extension__ ({ \ + poly16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 37); r; }) +#define vld3_u8(__a) __extension__ ({ \ + uint8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 16); r; }) +#define vld3_u16(__a) __extension__ ({ \ + uint16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 17); r; }) +#define vld3_u32(__a) __extension__ ({ \ + uint32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 18); r; }) +#define vld3_u64(__a) __extension__ ({ \ + uint64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 19); r; }) +#define vld3_s8(__a) __extension__ ({ \ + int8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 0); r; }) +#define vld3_s16(__a) __extension__ ({ \ + int16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 1); r; }) +#define vld3_s32(__a) __extension__ ({ \ + int32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 2); r; }) +#define vld3_s64(__a) __extension__ ({ \ + int64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 3); r; }) +#define vld3_f16(__a) __extension__ ({ \ + float16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 6); r; }) +#define vld3_f32(__a) __extension__ ({ \ + float32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 7); r; }) +#define vld3_p8(__a) __extension__ ({ \ + poly8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 4); r; }) +#define vld3_p16(__a) __extension__ ({ \ + poly16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 5); r; }) + +#define vld3_dup_u8(__a) __extension__ ({ \ + uint8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 16); r; }) +#define vld3_dup_u16(__a) __extension__ ({ \ + uint16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 17); r; }) +#define vld3_dup_u32(__a) __extension__ ({ \ + uint32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 18); r; }) +#define vld3_dup_u64(__a) __extension__ ({ \ + uint64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 19); r; }) +#define vld3_dup_s8(__a) __extension__ ({ \ + int8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 0); r; }) +#define vld3_dup_s16(__a) __extension__ ({ \ + int16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 1); r; }) +#define vld3_dup_s32(__a) __extension__ ({ \ + int32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 2); r; }) +#define vld3_dup_s64(__a) __extension__ ({ \ + int64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 3); r; }) +#define vld3_dup_f16(__a) __extension__ ({ \ + float16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 6); r; }) +#define vld3_dup_f32(__a) __extension__ ({ \ + float32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 7); r; }) +#define vld3_dup_p8(__a) __extension__ ({ \ + poly8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 4); r; }) +#define vld3_dup_p16(__a) __extension__ ({ \ + poly16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 5); r; }) + +#define vld3q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x3_t __b = (b); \ + uint16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); r; }) +#define vld3q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x3_t __b = (b); \ + uint32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); r; }) +#define vld3q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x3_t __b = (b); \ + int16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); r; }) +#define vld3q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x3_t __b = (b); \ + int32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); r; }) +#define vld3q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x3_t __b = (b); \ + float16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); r; }) +#define vld3q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x3_t __b = (b); \ + float32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); r; }) +#define vld3q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x3_t __b = (b); \ + poly16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); r; }) +#define vld3_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x3_t __b = (b); \ + uint8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); r; }) +#define vld3_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x3_t __b = (b); \ + uint16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); r; }) +#define vld3_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x3_t __b = (b); \ + uint32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); r; }) +#define vld3_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x3_t __b = (b); \ + int8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __c, 0); r; }) +#define vld3_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x3_t __b = (b); \ + int16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); r; }) +#define vld3_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x3_t __b = (b); \ + int32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); r; }) +#define vld3_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x3_t __b = (b); \ + float16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); r; }) +#define vld3_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x3_t __b = (b); \ + float32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); r; }) +#define vld3_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x3_t __b = (b); \ + poly8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); r; }) +#define vld3_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x3_t __b = (b); \ + poly16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); r; }) + +#define vld4q_u8(__a) __extension__ ({ \ + uint8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 48); r; }) +#define vld4q_u16(__a) __extension__ ({ \ + uint16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 49); r; }) +#define vld4q_u32(__a) __extension__ ({ \ + uint32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 50); r; }) +#define vld4q_s8(__a) __extension__ ({ \ + int8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 32); r; }) +#define vld4q_s16(__a) __extension__ ({ \ + int16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 33); r; }) +#define vld4q_s32(__a) __extension__ ({ \ + int32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 34); r; }) +#define vld4q_f16(__a) __extension__ ({ \ + float16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 38); r; }) +#define vld4q_f32(__a) __extension__ ({ \ + float32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 39); r; }) +#define vld4q_p8(__a) __extension__ ({ \ + poly8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 36); r; }) +#define vld4q_p16(__a) __extension__ ({ \ + poly16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 37); r; }) +#define vld4_u8(__a) __extension__ ({ \ + uint8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 16); r; }) +#define vld4_u16(__a) __extension__ ({ \ + uint16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 17); r; }) +#define vld4_u32(__a) __extension__ ({ \ + uint32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 18); r; }) +#define vld4_u64(__a) __extension__ ({ \ + uint64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 19); r; }) +#define vld4_s8(__a) __extension__ ({ \ + int8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 0); r; }) +#define vld4_s16(__a) __extension__ ({ \ + int16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 1); r; }) +#define vld4_s32(__a) __extension__ ({ \ + int32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 2); r; }) +#define vld4_s64(__a) __extension__ ({ \ + int64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 3); r; }) +#define vld4_f16(__a) __extension__ ({ \ + float16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 6); r; }) +#define vld4_f32(__a) __extension__ ({ \ + float32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 7); r; }) +#define vld4_p8(__a) __extension__ ({ \ + poly8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 4); r; }) +#define vld4_p16(__a) __extension__ ({ \ + poly16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 5); r; }) + +#define vld4_dup_u8(__a) __extension__ ({ \ + uint8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 16); r; }) +#define vld4_dup_u16(__a) __extension__ ({ \ + uint16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 17); r; }) +#define vld4_dup_u32(__a) __extension__ ({ \ + uint32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 18); r; }) +#define vld4_dup_u64(__a) __extension__ ({ \ + uint64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 19); r; }) +#define vld4_dup_s8(__a) __extension__ ({ \ + int8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 0); r; }) +#define vld4_dup_s16(__a) __extension__ ({ \ + int16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 1); r; }) +#define vld4_dup_s32(__a) __extension__ ({ \ + int32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 2); r; }) +#define vld4_dup_s64(__a) __extension__ ({ \ + int64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 3); r; }) +#define vld4_dup_f16(__a) __extension__ ({ \ + float16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 6); r; }) +#define vld4_dup_f32(__a) __extension__ ({ \ + float32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 7); r; }) +#define vld4_dup_p8(__a) __extension__ ({ \ + poly8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 4); r; }) +#define vld4_dup_p16(__a) __extension__ ({ \ + poly16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 5); r; }) + +#define vld4q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x4_t __b = (b); \ + uint16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); r; }) +#define vld4q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x4_t __b = (b); \ + uint32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); r; }) +#define vld4q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x4_t __b = (b); \ + int16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); r; }) +#define vld4q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x4_t __b = (b); \ + int32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); r; }) +#define vld4q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x4_t __b = (b); \ + float16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); r; }) +#define vld4q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x4_t __b = (b); \ + float32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); r; }) +#define vld4q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x4_t __b = (b); \ + poly16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); r; }) +#define vld4_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x4_t __b = (b); \ + uint8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); r; }) +#define vld4_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x4_t __b = (b); \ + uint16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); r; }) +#define vld4_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x4_t __b = (b); \ + uint32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); r; }) +#define vld4_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x4_t __b = (b); \ + int8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); r; }) +#define vld4_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x4_t __b = (b); \ + int16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); r; }) +#define vld4_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x4_t __b = (b); \ + int32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); r; }) +#define vld4_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x4_t __b = (b); \ + float16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); r; }) +#define vld4_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x4_t __b = (b); \ + float32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); r; }) +#define vld4_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x4_t __b = (b); \ + poly8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); r; }) +#define vld4_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x4_t __b = (b); \ + poly16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); r; }) + +__ai int8x8_t vmax_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vmax_v(__a, __b, 0); } +__ai int16x4_t vmax_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vmax_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vmax_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vmax_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vmax_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vmax_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vmaxq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vmaxq_v(__a, __b, 32); } +__ai int16x8_t vmaxq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vmaxq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vmaxq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vmaxq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vmaxq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vmaxq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 39); } + +__ai int8x8_t vmin_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vmin_v(__a, __b, 0); } +__ai int16x4_t vmin_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vmin_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vmin_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vmin_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vmin_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vmin_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai int8x16_t vminq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vminq_v(__a, __b, 32); } +__ai int16x8_t vminq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vminq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vminq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vminq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vminq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vminq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 39); } + +__ai int8x8_t vmla_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a + (__b * __c); } +__ai int16x4_t vmla_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a + (__b * __c); } +__ai int32x2_t vmla_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a + (__b * __c); } +__ai float32x2_t vmla_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { + return __a + (__b * __c); } +__ai uint8x8_t vmla_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a + (__b * __c); } +__ai uint16x4_t vmla_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a + (__b * __c); } +__ai uint32x2_t vmla_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a + (__b * __c); } +__ai int8x16_t vmlaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { + return __a + (__b * __c); } +__ai int16x8_t vmlaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { + return __a + (__b * __c); } +__ai int32x4_t vmlaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { + return __a + (__b * __c); } +__ai float32x4_t vmlaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { + return __a + (__b * __c); } +__ai uint8x16_t vmlaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return __a + (__b * __c); } +__ai uint16x8_t vmlaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return __a + (__b * __c); } +__ai uint32x4_t vmlaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return __a + (__b * __c); } + +__ai int16x8_t vmlal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a + vmull_s8(__b, __c); } +__ai int32x4_t vmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a + vmull_s16(__b, __c); } +__ai int64x2_t vmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a + vmull_s32(__b, __c); } +__ai uint16x8_t vmlal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a + vmull_u8(__b, __c); } +__ai uint32x4_t vmlal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a + vmull_u16(__b, __c); } +__ai uint64x2_t vmlal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a + vmull_u32(__b, __c); } + +#define vmlal_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlal_lane_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ + __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_lane_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ + __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { + return __a + vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); } +__ai int64x2_t vmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { + return __a + vmull_s32(__b, (int32x2_t){ __c, __c }); } +__ai uint32x4_t vmlal_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) { + return __a + vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); } +__ai uint64x2_t vmlal_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) { + return __a + vmull_u32(__b, (uint32x2_t){ __c, __c }); } + +#define vmla_lane_s16(a, b, c, __d) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmla_lane_s32(a, b, c, __d) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmla_lane_u16(a, b, c, __d) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmla_lane_u32(a, b, c, __d) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmla_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlaq_lane_s16(a, b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlaq_lane_s32(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlaq_lane_u16(a, b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlaq_lane_u32(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlaq_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int16x4_t vmla_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) { + return __a + (__b * (int16x4_t){ __c, __c, __c, __c }); } +__ai int32x2_t vmla_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) { + return __a + (__b * (int32x2_t){ __c, __c }); } +__ai uint16x4_t vmla_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) { + return __a + (__b * (uint16x4_t){ __c, __c, __c, __c }); } +__ai uint32x2_t vmla_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) { + return __a + (__b * (uint32x2_t){ __c, __c }); } +__ai float32x2_t vmla_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { + return __a + (__b * (float32x2_t){ __c, __c }); } +__ai int16x8_t vmlaq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) { + return __a + (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai int32x4_t vmlaq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) { + return __a + (__b * (int32x4_t){ __c, __c, __c, __c }); } +__ai uint16x8_t vmlaq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) { + return __a + (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai uint32x4_t vmlaq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) { + return __a + (__b * (uint32x4_t){ __c, __c, __c, __c }); } +__ai float32x4_t vmlaq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { + return __a + (__b * (float32x4_t){ __c, __c, __c, __c }); } + +__ai int8x8_t vmls_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a - (__b * __c); } +__ai int16x4_t vmls_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a - (__b * __c); } +__ai int32x2_t vmls_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a - (__b * __c); } +__ai float32x2_t vmls_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { + return __a - (__b * __c); } +__ai uint8x8_t vmls_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a - (__b * __c); } +__ai uint16x4_t vmls_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a - (__b * __c); } +__ai uint32x2_t vmls_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a - (__b * __c); } +__ai int8x16_t vmlsq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { + return __a - (__b * __c); } +__ai int16x8_t vmlsq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { + return __a - (__b * __c); } +__ai int32x4_t vmlsq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { + return __a - (__b * __c); } +__ai float32x4_t vmlsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { + return __a - (__b * __c); } +__ai uint8x16_t vmlsq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { + return __a - (__b * __c); } +__ai uint16x8_t vmlsq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return __a - (__b * __c); } +__ai uint32x4_t vmlsq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return __a - (__b * __c); } + +__ai int16x8_t vmlsl_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { + return __a - vmull_s8(__b, __c); } +__ai int32x4_t vmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return __a - vmull_s16(__b, __c); } +__ai int64x2_t vmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return __a - vmull_s32(__b, __c); } +__ai uint16x8_t vmlsl_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return __a - vmull_u8(__b, __c); } +__ai uint32x4_t vmlsl_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { + return __a - vmull_u16(__b, __c); } +__ai uint64x2_t vmlsl_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { + return __a - vmull_u32(__b, __c); } + +#define vmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsl_lane_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ + __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_lane_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ + __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { + return __a - vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); } +__ai int64x2_t vmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { + return __a - vmull_s32(__b, (int32x2_t){ __c, __c }); } +__ai uint32x4_t vmlsl_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) { + return __a - vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); } +__ai uint64x2_t vmlsl_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) { + return __a - vmull_u32(__b, (uint32x2_t){ __c, __c }); } + +#define vmls_lane_s16(a, b, c, __d) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmls_lane_s32(a, b, c, __d) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmls_lane_u16(a, b, c, __d) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmls_lane_u32(a, b, c, __d) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmls_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsq_lane_s16(a, b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsq_lane_s32(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsq_lane_u16(a, b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsq_lane_u32(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsq_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int16x4_t vmls_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) { + return __a - (__b * (int16x4_t){ __c, __c, __c, __c }); } +__ai int32x2_t vmls_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) { + return __a - (__b * (int32x2_t){ __c, __c }); } +__ai uint16x4_t vmls_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) { + return __a - (__b * (uint16x4_t){ __c, __c, __c, __c }); } +__ai uint32x2_t vmls_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) { + return __a - (__b * (uint32x2_t){ __c, __c }); } +__ai float32x2_t vmls_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { + return __a - (__b * (float32x2_t){ __c, __c }); } +__ai int16x8_t vmlsq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) { + return __a - (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai int32x4_t vmlsq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) { + return __a - (__b * (int32x4_t){ __c, __c, __c, __c }); } +__ai uint16x8_t vmlsq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) { + return __a - (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } +__ai uint32x4_t vmlsq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) { + return __a - (__b * (uint32x4_t){ __c, __c, __c, __c }); } +__ai float32x4_t vmlsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { + return __a - (__b * (float32x4_t){ __c, __c, __c, __c }); } + +__ai int8x8_t vmovn_s16(int16x8_t __a) { + return (int8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 0); } +__ai int16x4_t vmovn_s32(int32x4_t __a) { + return (int16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 1); } +__ai int32x2_t vmovn_s64(int64x2_t __a) { + return (int32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 2); } +__ai uint8x8_t vmovn_u16(uint16x8_t __a) { + return (uint8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 16); } +__ai uint16x4_t vmovn_u32(uint32x4_t __a) { + return (uint16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 17); } +__ai uint32x2_t vmovn_u64(uint64x2_t __a) { + return (uint32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 18); } + +__ai uint8x8_t vmov_n_u8(uint8_t __a) { + return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint16x4_t vmov_n_u16(uint16_t __a) { + return (uint16x4_t){ __a, __a, __a, __a }; } +__ai uint32x2_t vmov_n_u32(uint32_t __a) { + return (uint32x2_t){ __a, __a }; } +__ai int8x8_t vmov_n_s8(int8_t __a) { + return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int16x4_t vmov_n_s16(int16_t __a) { + return (int16x4_t){ __a, __a, __a, __a }; } +__ai int32x2_t vmov_n_s32(int32_t __a) { + return (int32x2_t){ __a, __a }; } +__ai poly8x8_t vmov_n_p8(poly8_t __a) { + return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly16x4_t vmov_n_p16(poly16_t __a) { + return (poly16x4_t){ __a, __a, __a, __a }; } +__ai float32x2_t vmov_n_f32(float32_t __a) { + return (float32x2_t){ __a, __a }; } +__ai uint8x16_t vmovq_n_u8(uint8_t __a) { + return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint16x8_t vmovq_n_u16(uint16_t __a) { + return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai uint32x4_t vmovq_n_u32(uint32_t __a) { + return (uint32x4_t){ __a, __a, __a, __a }; } +__ai int8x16_t vmovq_n_s8(int8_t __a) { + return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int16x8_t vmovq_n_s16(int16_t __a) { + return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai int32x4_t vmovq_n_s32(int32_t __a) { + return (int32x4_t){ __a, __a, __a, __a }; } +__ai poly8x16_t vmovq_n_p8(poly8_t __a) { + return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai poly16x8_t vmovq_n_p16(poly16_t __a) { + return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } +__ai float32x4_t vmovq_n_f32(float32_t __a) { + return (float32x4_t){ __a, __a, __a, __a }; } +__ai int64x1_t vmov_n_s64(int64_t __a) { + return (int64x1_t){ __a }; } +__ai uint64x1_t vmov_n_u64(uint64_t __a) { + return (uint64x1_t){ __a }; } +__ai int64x2_t vmovq_n_s64(int64_t __a) { + return (int64x2_t){ __a, __a }; } +__ai uint64x2_t vmovq_n_u64(uint64_t __a) { + return (uint64x2_t){ __a, __a }; } + +__ai int8x8_t vmul_s8(int8x8_t __a, int8x8_t __b) { + return __a * __b; } +__ai int16x4_t vmul_s16(int16x4_t __a, int16x4_t __b) { + return __a * __b; } +__ai int32x2_t vmul_s32(int32x2_t __a, int32x2_t __b) { + return __a * __b; } +__ai float32x2_t vmul_f32(float32x2_t __a, float32x2_t __b) { + return __a * __b; } +__ai uint8x8_t vmul_u8(uint8x8_t __a, uint8x8_t __b) { + return __a * __b; } +__ai uint16x4_t vmul_u16(uint16x4_t __a, uint16x4_t __b) { + return __a * __b; } +__ai uint32x2_t vmul_u32(uint32x2_t __a, uint32x2_t __b) { + return __a * __b; } +__ai int8x16_t vmulq_s8(int8x16_t __a, int8x16_t __b) { + return __a * __b; } +__ai int16x8_t vmulq_s16(int16x8_t __a, int16x8_t __b) { + return __a * __b; } +__ai int32x4_t vmulq_s32(int32x4_t __a, int32x4_t __b) { + return __a * __b; } +__ai float32x4_t vmulq_f32(float32x4_t __a, float32x4_t __b) { + return __a * __b; } +__ai uint8x16_t vmulq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a * __b; } +__ai uint16x8_t vmulq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a * __b; } +__ai uint32x4_t vmulq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a * __b; } + +#define vmull_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmull_lane_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_lane_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +__ai int32x4_t vmull_n_s16(int16x4_t __a, int16_t __b) { + return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); } +__ai int64x2_t vmull_n_s32(int32x2_t __a, int32_t __b) { + return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); } +__ai uint32x4_t vmull_n_u16(uint16x4_t __a, uint16_t __b) { + return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint16x4_t){ __b, __b, __b, __b }, 50); } +__ai uint64x2_t vmull_n_u32(uint32x2_t __a, uint32_t __b) { + return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint32x2_t){ __b, __b }, 51); } + +__ai poly8x8_t vmul_p8(poly8x8_t __a, poly8x8_t __b) { + return (poly8x8_t)__builtin_neon_vmul_v((int8x8_t)__a, (int8x8_t)__b, 4); } +__ai poly8x16_t vmulq_p8(poly8x16_t __a, poly8x16_t __b) { + return (poly8x16_t)__builtin_neon_vmulq_v((int8x16_t)__a, (int8x16_t)__b, 36); } + +#define vmul_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmul_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmul_lane_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmul_lane_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmul_lane_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmulq_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) +#define vmulq_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_lane_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_lane_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) +#define vmulq_lane_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) + +__ai int16x4_t vmul_n_s16(int16x4_t __a, int16_t __b) { + return __a * (int16x4_t){ __b, __b, __b, __b }; } +__ai int32x2_t vmul_n_s32(int32x2_t __a, int32_t __b) { + return __a * (int32x2_t){ __b, __b }; } +__ai float32x2_t vmul_n_f32(float32x2_t __a, float32_t __b) { + return __a * (float32x2_t){ __b, __b }; } +__ai uint16x4_t vmul_n_u16(uint16x4_t __a, uint16_t __b) { + return __a * (uint16x4_t){ __b, __b, __b, __b }; } +__ai uint32x2_t vmul_n_u32(uint32x2_t __a, uint32_t __b) { + return __a * (uint32x2_t){ __b, __b }; } +__ai int16x8_t vmulq_n_s16(int16x8_t __a, int16_t __b) { + return __a * (int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; } +__ai int32x4_t vmulq_n_s32(int32x4_t __a, int32_t __b) { + return __a * (int32x4_t){ __b, __b, __b, __b }; } +__ai float32x4_t vmulq_n_f32(float32x4_t __a, float32_t __b) { + return __a * (float32x4_t){ __b, __b, __b, __b }; } +__ai uint16x8_t vmulq_n_u16(uint16x8_t __a, uint16_t __b) { + return __a * (uint16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; } +__ai uint32x4_t vmulq_n_u32(uint32x4_t __a, uint32_t __b) { + return __a * (uint32x4_t){ __b, __b, __b, __b }; } + +__ai int8x8_t vmvn_s8(int8x8_t __a) { + return ~__a; } +__ai int16x4_t vmvn_s16(int16x4_t __a) { + return ~__a; } +__ai int32x2_t vmvn_s32(int32x2_t __a) { + return ~__a; } +__ai uint8x8_t vmvn_u8(uint8x8_t __a) { + return ~__a; } +__ai uint16x4_t vmvn_u16(uint16x4_t __a) { + return ~__a; } +__ai uint32x2_t vmvn_u32(uint32x2_t __a) { + return ~__a; } +__ai poly8x8_t vmvn_p8(poly8x8_t __a) { + return ~__a; } +__ai int8x16_t vmvnq_s8(int8x16_t __a) { + return ~__a; } +__ai int16x8_t vmvnq_s16(int16x8_t __a) { + return ~__a; } +__ai int32x4_t vmvnq_s32(int32x4_t __a) { + return ~__a; } +__ai uint8x16_t vmvnq_u8(uint8x16_t __a) { + return ~__a; } +__ai uint16x8_t vmvnq_u16(uint16x8_t __a) { + return ~__a; } +__ai uint32x4_t vmvnq_u32(uint32x4_t __a) { + return ~__a; } +__ai poly8x16_t vmvnq_p8(poly8x16_t __a) { + return ~__a; } + +__ai int8x8_t vneg_s8(int8x8_t __a) { + return -__a; } +__ai int16x4_t vneg_s16(int16x4_t __a) { + return -__a; } +__ai int32x2_t vneg_s32(int32x2_t __a) { + return -__a; } +__ai float32x2_t vneg_f32(float32x2_t __a) { + return -__a; } +__ai int8x16_t vnegq_s8(int8x16_t __a) { + return -__a; } +__ai int16x8_t vnegq_s16(int16x8_t __a) { + return -__a; } +__ai int32x4_t vnegq_s32(int32x4_t __a) { + return -__a; } +__ai float32x4_t vnegq_f32(float32x4_t __a) { + return -__a; } + +__ai int8x8_t vorn_s8(int8x8_t __a, int8x8_t __b) { + return __a | ~__b; } +__ai int16x4_t vorn_s16(int16x4_t __a, int16x4_t __b) { + return __a | ~__b; } +__ai int32x2_t vorn_s32(int32x2_t __a, int32x2_t __b) { + return __a | ~__b; } +__ai int64x1_t vorn_s64(int64x1_t __a, int64x1_t __b) { + return __a | ~__b; } +__ai uint8x8_t vorn_u8(uint8x8_t __a, uint8x8_t __b) { + return __a | ~__b; } +__ai uint16x4_t vorn_u16(uint16x4_t __a, uint16x4_t __b) { + return __a | ~__b; } +__ai uint32x2_t vorn_u32(uint32x2_t __a, uint32x2_t __b) { + return __a | ~__b; } +__ai uint64x1_t vorn_u64(uint64x1_t __a, uint64x1_t __b) { + return __a | ~__b; } +__ai int8x16_t vornq_s8(int8x16_t __a, int8x16_t __b) { + return __a | ~__b; } +__ai int16x8_t vornq_s16(int16x8_t __a, int16x8_t __b) { + return __a | ~__b; } +__ai int32x4_t vornq_s32(int32x4_t __a, int32x4_t __b) { + return __a | ~__b; } +__ai int64x2_t vornq_s64(int64x2_t __a, int64x2_t __b) { + return __a | ~__b; } +__ai uint8x16_t vornq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a | ~__b; } +__ai uint16x8_t vornq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a | ~__b; } +__ai uint32x4_t vornq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a | ~__b; } +__ai uint64x2_t vornq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a | ~__b; } + +__ai int8x8_t vorr_s8(int8x8_t __a, int8x8_t __b) { + return __a | __b; } +__ai int16x4_t vorr_s16(int16x4_t __a, int16x4_t __b) { + return __a | __b; } +__ai int32x2_t vorr_s32(int32x2_t __a, int32x2_t __b) { + return __a | __b; } +__ai int64x1_t vorr_s64(int64x1_t __a, int64x1_t __b) { + return __a | __b; } +__ai uint8x8_t vorr_u8(uint8x8_t __a, uint8x8_t __b) { + return __a | __b; } +__ai uint16x4_t vorr_u16(uint16x4_t __a, uint16x4_t __b) { + return __a | __b; } +__ai uint32x2_t vorr_u32(uint32x2_t __a, uint32x2_t __b) { + return __a | __b; } +__ai uint64x1_t vorr_u64(uint64x1_t __a, uint64x1_t __b) { + return __a | __b; } +__ai int8x16_t vorrq_s8(int8x16_t __a, int8x16_t __b) { + return __a | __b; } +__ai int16x8_t vorrq_s16(int16x8_t __a, int16x8_t __b) { + return __a | __b; } +__ai int32x4_t vorrq_s32(int32x4_t __a, int32x4_t __b) { + return __a | __b; } +__ai int64x2_t vorrq_s64(int64x2_t __a, int64x2_t __b) { + return __a | __b; } +__ai uint8x16_t vorrq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a | __b; } +__ai uint16x8_t vorrq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a | __b; } +__ai uint32x4_t vorrq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a | __b; } +__ai uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a | __b; } + +__ai int16x4_t vpadal_s8(int16x4_t __a, int8x8_t __b) { + return (int16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, __b, 1); } +__ai int32x2_t vpadal_s16(int32x2_t __a, int16x4_t __b) { + return (int32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vpadal_s32(int64x1_t __a, int32x2_t __b) { + return (int64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint16x4_t vpadal_u8(uint16x4_t __a, uint8x8_t __b) { + return (uint16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vpadal_u16(uint32x2_t __a, uint16x4_t __b) { + return (uint32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vpadal_u32(uint64x1_t __a, uint32x2_t __b) { + return (uint64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int16x8_t vpadalq_s8(int16x8_t __a, int8x16_t __b) { + return (int16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, __b, 33); } +__ai int32x4_t vpadalq_s16(int32x4_t __a, int16x8_t __b) { + return (int32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vpadalq_s32(int64x2_t __a, int32x4_t __b) { + return (int64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint16x8_t vpadalq_u8(uint16x8_t __a, uint8x16_t __b) { + return (uint16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vpadalq_u16(uint32x4_t __a, uint16x8_t __b) { + return (uint32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vpadalq_u32(uint64x2_t __a, uint32x4_t __b) { + return (uint64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int8x8_t vpadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vpadd_v(__a, __b, 0); } +__ai int16x4_t vpadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vpadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vpadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vpadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vpadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vpadd_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 7); } + +__ai int16x4_t vpaddl_s8(int8x8_t __a) { + return (int16x4_t)__builtin_neon_vpaddl_v(__a, 1); } +__ai int32x2_t vpaddl_s16(int16x4_t __a) { + return (int32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 2); } +__ai int64x1_t vpaddl_s32(int32x2_t __a) { + return (int64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 3); } +__ai uint16x4_t vpaddl_u8(uint8x8_t __a) { + return (uint16x4_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 17); } +__ai uint32x2_t vpaddl_u16(uint16x4_t __a) { + return (uint32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 18); } +__ai uint64x1_t vpaddl_u32(uint32x2_t __a) { + return (uint64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 19); } +__ai int16x8_t vpaddlq_s8(int8x16_t __a) { + return (int16x8_t)__builtin_neon_vpaddlq_v(__a, 33); } +__ai int32x4_t vpaddlq_s16(int16x8_t __a) { + return (int32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 34); } +__ai int64x2_t vpaddlq_s32(int32x4_t __a) { + return (int64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 35); } +__ai uint16x8_t vpaddlq_u8(uint8x16_t __a) { + return (uint16x8_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 49); } +__ai uint32x4_t vpaddlq_u16(uint16x8_t __a) { + return (uint32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 50); } +__ai uint64x2_t vpaddlq_u32(uint32x4_t __a) { + return (uint64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 51); } + +__ai int8x8_t vpmax_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vpmax_v(__a, __b, 0); } +__ai int16x4_t vpmax_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vpmax_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vpmax_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vpmax_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vpmax_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vpmax_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 7); } + +__ai int8x8_t vpmin_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vpmin_v(__a, __b, 0); } +__ai int16x4_t vpmin_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vpmin_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vpmin_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vpmin_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vpmin_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai float32x2_t vpmin_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 7); } + +__ai int8x8_t vqabs_s8(int8x8_t __a) { + return (int8x8_t)__builtin_neon_vqabs_v(__a, 0); } +__ai int16x4_t vqabs_s16(int16x4_t __a) { + return (int16x4_t)__builtin_neon_vqabs_v((int8x8_t)__a, 1); } +__ai int32x2_t vqabs_s32(int32x2_t __a) { + return (int32x2_t)__builtin_neon_vqabs_v((int8x8_t)__a, 2); } +__ai int8x16_t vqabsq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_neon_vqabsq_v(__a, 32); } +__ai int16x8_t vqabsq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 33); } +__ai int32x4_t vqabsq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 34); } + +__ai int8x8_t vqadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vqadd_v(__a, __b, 0); } +__ai int16x4_t vqadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vqadd_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vqadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vqadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vqadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vqadd_u64(uint64x1_t __a, uint64x1_t __b) { + return (uint64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vqaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vqaddq_v(__a, __b, 32); } +__ai int16x8_t vqaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqaddq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vqaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vqaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vqaddq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int32x4_t vqdmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 34); } +__ai int64x2_t vqdmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 35); } + +#define vqdmlal_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlal_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vqdmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { + return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); } +__ai int64x2_t vqdmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { + return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int32x2_t){ __c, __c }, 35); } + +__ai int32x4_t vqdmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { + return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 34); } +__ai int64x2_t vqdmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { + return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)__c, 35); } + +#define vqdmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ + vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ + vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vqdmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { + return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); } +__ai int64x2_t vqdmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { + return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int8x8_t)(int32x2_t){ __c, __c }, 35); } + +__ai int16x4_t vqdmulh_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqdmulh_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int16x8_t vqdmulhq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqdmulhq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34); } + +#define vqdmulh_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmulh_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vqdmulhq_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) +#define vqdmulhq_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) + +__ai int16x4_t vqdmulh_n_s16(int16x4_t __a, int16_t __b) { + return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 1); } +__ai int32x2_t vqdmulh_n_s32(int32x2_t __a, int32_t __b) { + return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 2); } +__ai int16x8_t vqdmulhq_n_s16(int16x8_t __a, int16_t __b) { + return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); } +__ai int32x4_t vqdmulhq_n_s32(int32x4_t __a, int32_t __b) { + return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x4_t){ __b, __b, __b, __b }, 34); } + +__ai int32x4_t vqdmull_s16(int16x4_t __a, int16x4_t __b) { + return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 34); } +__ai int64x2_t vqdmull_s32(int32x2_t __a, int32x2_t __b) { + return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 35); } + +#define vqdmull_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmull_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +__ai int32x4_t vqdmull_n_s16(int16x4_t __a, int16_t __b) { + return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); } +__ai int64x2_t vqdmull_n_s32(int32x2_t __a, int32_t __b) { + return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); } + +__ai int8x8_t vqmovn_s16(int16x8_t __a) { + return (int8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 0); } +__ai int16x4_t vqmovn_s32(int32x4_t __a) { + return (int16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 1); } +__ai int32x2_t vqmovn_s64(int64x2_t __a) { + return (int32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 2); } +__ai uint8x8_t vqmovn_u16(uint16x8_t __a) { + return (uint8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 16); } +__ai uint16x4_t vqmovn_u32(uint32x4_t __a) { + return (uint16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 17); } +__ai uint32x2_t vqmovn_u64(uint64x2_t __a) { + return (uint32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 18); } + +__ai uint8x8_t vqmovun_s16(int16x8_t __a) { + return (uint8x8_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 16); } +__ai uint16x4_t vqmovun_s32(int32x4_t __a) { + return (uint16x4_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 17); } +__ai uint32x2_t vqmovun_s64(int64x2_t __a) { + return (uint32x2_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 18); } + +__ai int8x8_t vqneg_s8(int8x8_t __a) { + return (int8x8_t)__builtin_neon_vqneg_v(__a, 0); } +__ai int16x4_t vqneg_s16(int16x4_t __a) { + return (int16x4_t)__builtin_neon_vqneg_v((int8x8_t)__a, 1); } +__ai int32x2_t vqneg_s32(int32x2_t __a) { + return (int32x2_t)__builtin_neon_vqneg_v((int8x8_t)__a, 2); } +__ai int8x16_t vqnegq_s8(int8x16_t __a) { + return (int8x16_t)__builtin_neon_vqnegq_v(__a, 32); } +__ai int16x8_t vqnegq_s16(int16x8_t __a) { + return (int16x8_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 33); } +__ai int32x4_t vqnegq_s32(int32x4_t __a) { + return (int32x4_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 34); } + +__ai int16x4_t vqrdmulh_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqrdmulh_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int16x8_t vqrdmulhq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqrdmulhq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34); } + +#define vqrdmulh_lane_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqrdmulh_lane_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vqrdmulhq_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) +#define vqrdmulhq_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) + +__ai int16x4_t vqrdmulh_n_s16(int16x4_t __a, int16_t __b) { + return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 1); } +__ai int32x2_t vqrdmulh_n_s32(int32x2_t __a, int32_t __b) { + return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 2); } +__ai int16x8_t vqrdmulhq_n_s16(int16x8_t __a, int16_t __b) { + return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); } +__ai int32x4_t vqrdmulhq_n_s32(int32x4_t __a, int32_t __b) { + return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x4_t){ __b, __b, __b, __b }, 34); } + +__ai int8x8_t vqrshl_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vqrshl_v(__a, __b, 0); } +__ai int16x4_t vqrshl_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqrshl_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vqrshl_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vqrshl_u8(uint8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_neon_vqrshl_v((int8x8_t)__a, __b, 16); } +__ai uint16x4_t vqrshl_u16(uint16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vqrshl_u32(uint32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vqrshl_u64(uint64x1_t __a, int64x1_t __b) { + return (uint64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vqrshlq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vqrshlq_v(__a, __b, 32); } +__ai int16x8_t vqrshlq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqrshlq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqrshlq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vqrshlq_u8(uint8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, __b, 48); } +__ai uint16x8_t vqrshlq_u16(uint16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqrshlq_u32(uint32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vqrshlq_u64(uint64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +#define vqrshrn_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 0); }) +#define vqrshrn_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 1); }) +#define vqrshrn_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 2); }) +#define vqrshrn_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 16); }) +#define vqrshrn_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 17); }) +#define vqrshrn_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 18); }) + +#define vqrshrun_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 16); }) +#define vqrshrun_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 17); }) +#define vqrshrun_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 18); }) + +__ai int8x8_t vqshl_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vqshl_v(__a, __b, 0); } +__ai int16x4_t vqshl_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqshl_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vqshl_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vqshl_u8(uint8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_neon_vqshl_v((int8x8_t)__a, __b, 16); } +__ai uint16x4_t vqshl_u16(uint16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vqshl_u32(uint32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vqshl_u64(uint64x1_t __a, int64x1_t __b) { + return (uint64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vqshlq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vqshlq_v(__a, __b, 32); } +__ai int16x8_t vqshlq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqshlq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqshlq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vqshlq_u8(uint8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_neon_vqshlq_v((int8x16_t)__a, __b, 48); } +__ai uint16x8_t vqshlq_u16(uint16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqshlq_u32(uint32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vqshlq_u64(uint64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +#define vqshlu_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vqshlu_n_v(__a, __b, 16); }) +#define vqshlu_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 17); }) +#define vqshlu_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 18); }) +#define vqshlu_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (uint64x1_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 19); }) +#define vqshluq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (uint8x16_t)__builtin_neon_vqshluq_n_v(__a, __b, 48); }) +#define vqshluq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (uint16x8_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 49); }) +#define vqshluq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (uint32x4_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 50); }) +#define vqshluq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (uint64x2_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 51); }) + +#define vqshl_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8x8_t)__builtin_neon_vqshl_n_v(__a, __b, 0); }) +#define vqshl_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 1); }) +#define vqshl_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 2); }) +#define vqshl_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 3); }) +#define vqshl_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 16); }) +#define vqshl_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 17); }) +#define vqshl_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 18); }) +#define vqshl_n_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 19); }) +#define vqshlq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8x16_t)__builtin_neon_vqshlq_n_v(__a, __b, 32); }) +#define vqshlq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 33); }) +#define vqshlq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 34); }) +#define vqshlq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 35); }) +#define vqshlq_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8x16_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 48); }) +#define vqshlq_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 49); }) +#define vqshlq_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 50); }) +#define vqshlq_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 51); }) + +#define vqshrn_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 0); }) +#define vqshrn_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 1); }) +#define vqshrn_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 2); }) +#define vqshrn_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 16); }) +#define vqshrn_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 17); }) +#define vqshrn_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 18); }) + +#define vqshrun_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 16); }) +#define vqshrun_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 17); }) +#define vqshrun_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 18); }) + +__ai int8x8_t vqsub_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vqsub_v(__a, __b, 0); } +__ai int16x4_t vqsub_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vqsub_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vqsub_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vqsub_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vqsub_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vqsub_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vqsub_u64(uint64x1_t __a, uint64x1_t __b) { + return (uint64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vqsubq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vqsubq_v(__a, __b, 32); } +__ai int16x8_t vqsubq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vqsubq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vqsubq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vqsubq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vqsubq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vqsubq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vqsubq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai int8x8_t vraddhn_s16(int16x8_t __a, int16x8_t __b) { + return (int8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } +__ai int16x4_t vraddhn_s32(int32x4_t __a, int32x4_t __b) { + return (int16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } +__ai int32x2_t vraddhn_s64(int64x2_t __a, int64x2_t __b) { + return (int32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } +__ai uint8x8_t vraddhn_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } +__ai uint16x4_t vraddhn_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } +__ai uint32x2_t vraddhn_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } + +__ai float32x2_t vrecpe_f32(float32x2_t __a) { + return (float32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 7); } +__ai uint32x2_t vrecpe_u32(uint32x2_t __a) { + return (uint32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 18); } +__ai float32x4_t vrecpeq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 39); } +__ai uint32x4_t vrecpeq_u32(uint32x4_t __a) { + return (uint32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 50); } + +__ai float32x2_t vrecps_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vrecpsq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 39); } + +__ai int8x8_t vreinterpret_s8_s16(int16x4_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_s32(int32x2_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_s64(int64x1_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_u8(uint8x8_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_u16(uint16x4_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_u32(uint32x2_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_u64(uint64x1_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_f16(float16x4_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_f32(float32x2_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_p8(poly8x8_t __a) { + return (int8x8_t)__a; } +__ai int8x8_t vreinterpret_s8_p16(poly16x4_t __a) { + return (int8x8_t)__a; } +__ai int16x4_t vreinterpret_s16_s8(int8x8_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_s32(int32x2_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_s64(int64x1_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_u8(uint8x8_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_u16(uint16x4_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_u32(uint32x2_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_u64(uint64x1_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_f16(float16x4_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_f32(float32x2_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_p8(poly8x8_t __a) { + return (int16x4_t)__a; } +__ai int16x4_t vreinterpret_s16_p16(poly16x4_t __a) { + return (int16x4_t)__a; } +__ai int32x2_t vreinterpret_s32_s8(int8x8_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_s16(int16x4_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_s64(int64x1_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_u8(uint8x8_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_u16(uint16x4_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_u32(uint32x2_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_u64(uint64x1_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_f16(float16x4_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_f32(float32x2_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_p8(poly8x8_t __a) { + return (int32x2_t)__a; } +__ai int32x2_t vreinterpret_s32_p16(poly16x4_t __a) { + return (int32x2_t)__a; } +__ai int64x1_t vreinterpret_s64_s8(int8x8_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_s16(int16x4_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_s32(int32x2_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_u8(uint8x8_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_u16(uint16x4_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_u32(uint32x2_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_u64(uint64x1_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_f16(float16x4_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_f32(float32x2_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_p8(poly8x8_t __a) { + return (int64x1_t)__a; } +__ai int64x1_t vreinterpret_s64_p16(poly16x4_t __a) { + return (int64x1_t)__a; } +__ai uint8x8_t vreinterpret_u8_s8(int8x8_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_s16(int16x4_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_s32(int32x2_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_s64(int64x1_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_f16(float16x4_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_f32(float32x2_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __a) { + return (uint8x8_t)__a; } +__ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __a) { + return (uint8x8_t)__a; } +__ai uint16x4_t vreinterpret_u16_s8(int8x8_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_s16(int16x4_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_s32(int32x2_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_s64(int64x1_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_f16(float16x4_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_f32(float32x2_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __a) { + return (uint16x4_t)__a; } +__ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __a) { + return (uint16x4_t)__a; } +__ai uint32x2_t vreinterpret_u32_s8(int8x8_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_s16(int16x4_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_s32(int32x2_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_s64(int64x1_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_f16(float16x4_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_f32(float32x2_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __a) { + return (uint32x2_t)__a; } +__ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __a) { + return (uint32x2_t)__a; } +__ai uint64x1_t vreinterpret_u64_s8(int8x8_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_s16(int16x4_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_s32(int32x2_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_s64(int64x1_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_f16(float16x4_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_f32(float32x2_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __a) { + return (uint64x1_t)__a; } +__ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __a) { + return (uint64x1_t)__a; } +__ai float16x4_t vreinterpret_f16_s8(int8x8_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_s16(int16x4_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_s32(int32x2_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_s64(int64x1_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_u8(uint8x8_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_u16(uint16x4_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_u32(uint32x2_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_u64(uint64x1_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_f32(float32x2_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_p8(poly8x8_t __a) { + return (float16x4_t)__a; } +__ai float16x4_t vreinterpret_f16_p16(poly16x4_t __a) { + return (float16x4_t)__a; } +__ai float32x2_t vreinterpret_f32_s8(int8x8_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_s16(int16x4_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_s32(int32x2_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_s64(int64x1_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_u8(uint8x8_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_u16(uint16x4_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_u32(uint32x2_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_u64(uint64x1_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_f16(float16x4_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_p8(poly8x8_t __a) { + return (float32x2_t)__a; } +__ai float32x2_t vreinterpret_f32_p16(poly16x4_t __a) { + return (float32x2_t)__a; } +__ai poly8x8_t vreinterpret_p8_s8(int8x8_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_s16(int16x4_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_s32(int32x2_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_s64(int64x1_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_f16(float16x4_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_f32(float32x2_t __a) { + return (poly8x8_t)__a; } +__ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __a) { + return (poly8x8_t)__a; } +__ai poly16x4_t vreinterpret_p16_s8(int8x8_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_s16(int16x4_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_s32(int32x2_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_s64(int64x1_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_f16(float16x4_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_f32(float32x2_t __a) { + return (poly16x4_t)__a; } +__ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __a) { + return (poly16x4_t)__a; } +__ai int8x16_t vreinterpretq_s8_s16(int16x8_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_s32(int32x4_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_s64(int64x2_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_f16(float16x8_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_f32(float32x4_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __a) { + return (int8x16_t)__a; } +__ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __a) { + return (int8x16_t)__a; } +__ai int16x8_t vreinterpretq_s16_s8(int8x16_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_s32(int32x4_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_s64(int64x2_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_f16(float16x8_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_f32(float32x4_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __a) { + return (int16x8_t)__a; } +__ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __a) { + return (int16x8_t)__a; } +__ai int32x4_t vreinterpretq_s32_s8(int8x16_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_s16(int16x8_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_s64(int64x2_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_f16(float16x8_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_f32(float32x4_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __a) { + return (int32x4_t)__a; } +__ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __a) { + return (int32x4_t)__a; } +__ai int64x2_t vreinterpretq_s64_s8(int8x16_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_s16(int16x8_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_s32(int32x4_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_f16(float16x8_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_f32(float32x4_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __a) { + return (int64x2_t)__a; } +__ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __a) { + return (int64x2_t)__a; } +__ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __a) { + return (uint8x16_t)__a; } +__ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __a) { + return (uint8x16_t)__a; } +__ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __a) { + return (uint16x8_t)__a; } +__ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __a) { + return (uint16x8_t)__a; } +__ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __a) { + return (uint32x4_t)__a; } +__ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __a) { + return (uint32x4_t)__a; } +__ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __a) { + return (uint64x2_t)__a; } +__ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __a) { + return (uint64x2_t)__a; } +__ai float16x8_t vreinterpretq_f16_s8(int8x16_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_s16(int16x8_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_s32(int32x4_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_s64(int64x2_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_f32(float32x4_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __a) { + return (float16x8_t)__a; } +__ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __a) { + return (float16x8_t)__a; } +__ai float32x4_t vreinterpretq_f32_s8(int8x16_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_s16(int16x8_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_s32(int32x4_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_s64(int64x2_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_f16(float16x8_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __a) { + return (float32x4_t)__a; } +__ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __a) { + return (float32x4_t)__a; } +__ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __a) { + return (poly8x16_t)__a; } +__ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __a) { + return (poly8x16_t)__a; } +__ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __a) { + return (poly16x8_t)__a; } +__ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __a) { + return (poly16x8_t)__a; } + +__ai int8x8_t vrev16_s8(int8x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai uint8x8_t vrev16_u8(uint8x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai poly8x8_t vrev16_p8(poly8x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai int8x16_t vrev16q_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } +__ai uint8x16_t vrev16q_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } +__ai poly8x16_t vrev16q_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); } + +__ai int8x8_t vrev32_s8(int8x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai int16x4_t vrev32_s16(int16x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai uint8x8_t vrev32_u8(uint8x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai uint16x4_t vrev32_u16(uint16x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai poly8x8_t vrev32_p8(poly8x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai poly16x4_t vrev32_p16(poly16x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai int8x16_t vrev32q_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } +__ai int16x8_t vrev32q_s16(int16x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai uint8x16_t vrev32q_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } +__ai uint16x8_t vrev32q_u16(uint16x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } +__ai poly8x16_t vrev32q_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); } +__ai poly16x8_t vrev32q_p16(poly16x8_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } + +__ai int8x8_t vrev64_s8(int8x8_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } +__ai int16x4_t vrev64_s16(int16x4_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } +__ai int32x2_t vrev64_s32(int32x2_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0); } +__ai uint8x8_t vrev64_u8(uint8x8_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } +__ai uint16x4_t vrev64_u16(uint16x4_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } +__ai uint32x2_t vrev64_u32(uint32x2_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0); } +__ai poly8x8_t vrev64_p8(poly8x8_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } +__ai poly16x4_t vrev64_p16(poly16x4_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } +__ai float32x2_t vrev64_f32(float32x2_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0); } +__ai int8x16_t vrev64q_s8(int8x16_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } +__ai int16x8_t vrev64q_s16(int16x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai int32x4_t vrev64q_s32(int32x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai uint8x16_t vrev64q_u8(uint8x16_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } +__ai uint16x8_t vrev64q_u16(uint16x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai uint32x4_t vrev64q_u32(uint32x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } +__ai poly8x16_t vrev64q_p8(poly8x16_t __a) { + return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } +__ai poly16x8_t vrev64q_p16(poly16x8_t __a) { + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } +__ai float32x4_t vrev64q_f32(float32x4_t __a) { + return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } + +__ai int8x8_t vrhadd_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vrhadd_v(__a, __b, 0); } +__ai int16x4_t vrhadd_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vrhadd_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai uint8x8_t vrhadd_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vrhadd_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vrhadd_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai int8x16_t vrhaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vrhaddq_v(__a, __b, 32); } +__ai int16x8_t vrhaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vrhaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vrhaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vrhaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vrhaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai int8x8_t vrshl_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vrshl_v(__a, __b, 0); } +__ai int16x4_t vrshl_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vrshl_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vrshl_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vrshl_u8(uint8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_neon_vrshl_v((int8x8_t)__a, __b, 16); } +__ai uint16x4_t vrshl_u16(uint16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vrshl_u32(uint32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vrshl_u64(uint64x1_t __a, int64x1_t __b) { + return (uint64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vrshlq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vrshlq_v(__a, __b, 32); } +__ai int16x8_t vrshlq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vrshlq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vrshlq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vrshlq_u8(uint8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_neon_vrshlq_v((int8x16_t)__a, __b, 48); } +__ai uint16x8_t vrshlq_u16(uint16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vrshlq_u32(uint32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vrshlq_u64(uint64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +#define vrshrn_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 0); }) +#define vrshrn_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 1); }) +#define vrshrn_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 2); }) +#define vrshrn_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 16); }) +#define vrshrn_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 17); }) +#define vrshrn_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 18); }) + +#define vrshr_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8x8_t)__builtin_neon_vrshr_n_v(__a, __b, 0); }) +#define vrshr_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 1); }) +#define vrshr_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 2); }) +#define vrshr_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 3); }) +#define vrshr_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 16); }) +#define vrshr_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 17); }) +#define vrshr_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 18); }) +#define vrshr_n_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 19); }) +#define vrshrq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8x16_t)__builtin_neon_vrshrq_n_v(__a, __b, 32); }) +#define vrshrq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 33); }) +#define vrshrq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 34); }) +#define vrshrq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 35); }) +#define vrshrq_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8x16_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 48); }) +#define vrshrq_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 49); }) +#define vrshrq_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 50); }) +#define vrshrq_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 51); }) + +__ai float32x2_t vrsqrte_f32(float32x2_t __a) { + return (float32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 7); } +__ai uint32x2_t vrsqrte_u32(uint32x2_t __a) { + return (uint32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 18); } +__ai float32x4_t vrsqrteq_f32(float32x4_t __a) { + return (float32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 39); } +__ai uint32x4_t vrsqrteq_u32(uint32x4_t __a) { + return (uint32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 50); } + +__ai float32x2_t vrsqrts_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vrsqrtsq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 39); } + +#define vrsra_n_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_neon_vrsra_n_v(__a, __b, __c, 0); }) +#define vrsra_n_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vrsra_n_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vrsra_n_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vrsra_n_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vrsra_n_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vrsra_n_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vrsra_n_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vrsraq_n_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_neon_vrsraq_n_v(__a, __b, __c, 32); }) +#define vrsraq_n_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vrsraq_n_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vrsraq_n_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vrsraq_n_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vrsraq_n_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vrsraq_n_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vrsraq_n_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) + +__ai int8x8_t vrsubhn_s16(int16x8_t __a, int16x8_t __b) { + return (int8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } +__ai int16x4_t vrsubhn_s32(int32x4_t __a, int32x4_t __b) { + return (int16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } +__ai int32x2_t vrsubhn_s64(int64x2_t __a, int64x2_t __b) { + return (int32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } +__ai uint8x8_t vrsubhn_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } +__ai uint16x4_t vrsubhn_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } +__ai uint32x2_t vrsubhn_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } + +#define vset_lane_u8(a, b, __c) __extension__ ({ \ + uint8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); }) +#define vset_lane_u16(a, b, __c) __extension__ ({ \ + uint16_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); }) +#define vset_lane_u32(a, b, __c) __extension__ ({ \ + uint32_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_neon_vset_lane_i32(__a, (int32x2_t)__b, __c); }) +#define vset_lane_s8(a, b, __c) __extension__ ({ \ + int8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_neon_vset_lane_i8(__a, __b, __c); }) +#define vset_lane_s16(a, b, __c) __extension__ ({ \ + int16_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_neon_vset_lane_i16(__a, __b, __c); }) +#define vset_lane_s32(a, b, __c) __extension__ ({ \ + int32_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_neon_vset_lane_i32(__a, __b, __c); }) +#define vset_lane_p8(a, b, __c) __extension__ ({ \ + poly8_t __a = (a); poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); }) +#define vset_lane_p16(a, b, __c) __extension__ ({ \ + poly16_t __a = (a); poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); }) +#define vset_lane_f32(a, b, __c) __extension__ ({ \ + float32_t __a = (a); float32x2_t __b = (b); \ + (float32x2_t)__builtin_neon_vset_lane_f32(__a, __b, __c); }) +#define vsetq_lane_u8(a, b, __c) __extension__ ({ \ + uint8_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); }) +#define vsetq_lane_u16(a, b, __c) __extension__ ({ \ + uint16_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); }) +#define vsetq_lane_u32(a, b, __c) __extension__ ({ \ + uint32_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_neon_vsetq_lane_i32(__a, (int32x4_t)__b, __c); }) +#define vsetq_lane_s8(a, b, __c) __extension__ ({ \ + int8_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_neon_vsetq_lane_i8(__a, __b, __c); }) +#define vsetq_lane_s16(a, b, __c) __extension__ ({ \ + int16_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_neon_vsetq_lane_i16(__a, __b, __c); }) +#define vsetq_lane_s32(a, b, __c) __extension__ ({ \ + int32_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_neon_vsetq_lane_i32(__a, __b, __c); }) +#define vsetq_lane_p8(a, b, __c) __extension__ ({ \ + poly8_t __a = (a); poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); }) +#define vsetq_lane_p16(a, b, __c) __extension__ ({ \ + poly16_t __a = (a); poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); }) +#define vsetq_lane_f32(a, b, __c) __extension__ ({ \ + float32_t __a = (a); float32x4_t __b = (b); \ + (float32x4_t)__builtin_neon_vsetq_lane_f32(__a, __b, __c); }) +#define vset_lane_s64(a, b, __c) __extension__ ({ \ + int64_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_neon_vset_lane_i64(__a, __b, __c); }) +#define vset_lane_u64(a, b, __c) __extension__ ({ \ + uint64_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_neon_vset_lane_i64(__a, (int64x1_t)__b, __c); }) +#define vsetq_lane_s64(a, b, __c) __extension__ ({ \ + int64_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_neon_vsetq_lane_i64(__a, __b, __c); }) +#define vsetq_lane_u64(a, b, __c) __extension__ ({ \ + uint64_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_neon_vsetq_lane_i64(__a, (int64x2_t)__b, __c); }) + +__ai int8x8_t vshl_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vshl_v(__a, __b, 0); } +__ai int16x4_t vshl_s16(int16x4_t __a, int16x4_t __b) { + return (int16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } +__ai int32x2_t vshl_s32(int32x2_t __a, int32x2_t __b) { + return (int32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } +__ai int64x1_t vshl_s64(int64x1_t __a, int64x1_t __b) { + return (int64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } +__ai uint8x8_t vshl_u8(uint8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_neon_vshl_v((int8x8_t)__a, __b, 16); } +__ai uint16x4_t vshl_u16(uint16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vshl_u32(uint32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint64x1_t vshl_u64(uint64x1_t __a, int64x1_t __b) { + return (uint64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 19); } +__ai int8x16_t vshlq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vshlq_v(__a, __b, 32); } +__ai int16x8_t vshlq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vshlq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai int64x2_t vshlq_s64(int64x2_t __a, int64x2_t __b) { + return (int64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); } +__ai uint8x16_t vshlq_u8(uint8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_neon_vshlq_v((int8x16_t)__a, __b, 48); } +__ai uint16x8_t vshlq_u16(uint16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vshlq_u32(uint32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint64x2_t vshlq_u64(uint64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +#define vshll_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int16x8_t)__builtin_neon_vshll_n_v(__a, __b, 33); }) +#define vshll_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 34); }) +#define vshll_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 35); }) +#define vshll_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint16x8_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 49); }) +#define vshll_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 50); }) +#define vshll_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 51); }) + +#define vshl_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8x8_t)__builtin_neon_vshl_n_v(__a, __b, 0); }) +#define vshl_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 1); }) +#define vshl_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 2); }) +#define vshl_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 3); }) +#define vshl_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 16); }) +#define vshl_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 17); }) +#define vshl_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 18); }) +#define vshl_n_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 19); }) +#define vshlq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8x16_t)__builtin_neon_vshlq_n_v(__a, __b, 32); }) +#define vshlq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 33); }) +#define vshlq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 34); }) +#define vshlq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 35); }) +#define vshlq_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8x16_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 48); }) +#define vshlq_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 49); }) +#define vshlq_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 50); }) +#define vshlq_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 51); }) + +#define vshrn_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 0); }) +#define vshrn_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 1); }) +#define vshrn_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 2); }) +#define vshrn_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 16); }) +#define vshrn_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 17); }) +#define vshrn_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 18); }) + +#define vshr_n_s8(a, __b) __extension__ ({ \ + int8x8_t __a = (a); \ + (int8x8_t)__builtin_neon_vshr_n_v(__a, __b, 0); }) +#define vshr_n_s16(a, __b) __extension__ ({ \ + int16x4_t __a = (a); \ + (int16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 1); }) +#define vshr_n_s32(a, __b) __extension__ ({ \ + int32x2_t __a = (a); \ + (int32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 2); }) +#define vshr_n_s64(a, __b) __extension__ ({ \ + int64x1_t __a = (a); \ + (int64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 3); }) +#define vshr_n_u8(a, __b) __extension__ ({ \ + uint8x8_t __a = (a); \ + (uint8x8_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 16); }) +#define vshr_n_u16(a, __b) __extension__ ({ \ + uint16x4_t __a = (a); \ + (uint16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 17); }) +#define vshr_n_u32(a, __b) __extension__ ({ \ + uint32x2_t __a = (a); \ + (uint32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 18); }) +#define vshr_n_u64(a, __b) __extension__ ({ \ + uint64x1_t __a = (a); \ + (uint64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 19); }) +#define vshrq_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + (int8x16_t)__builtin_neon_vshrq_n_v(__a, __b, 32); }) +#define vshrq_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + (int16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 33); }) +#define vshrq_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + (int32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 34); }) +#define vshrq_n_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (int64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 35); }) +#define vshrq_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + (uint8x16_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 48); }) +#define vshrq_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + (uint16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 49); }) +#define vshrq_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + (uint32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 50); }) +#define vshrq_n_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (uint64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 51); }) + +#define vsli_n_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_neon_vsli_n_v(__a, __b, __c, 0); }) +#define vsli_n_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vsli_n_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vsli_n_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vsli_n_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vsli_n_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vsli_n_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vsli_n_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vsli_n_p8(a, b, __c) __extension__ ({ \ + poly8x8_t __a = (a); poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) +#define vsli_n_p16(a, b, __c) __extension__ ({ \ + poly16x4_t __a = (a); poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) +#define vsliq_n_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_neon_vsliq_n_v(__a, __b, __c, 32); }) +#define vsliq_n_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vsliq_n_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vsliq_n_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vsliq_n_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vsliq_n_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vsliq_n_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vsliq_n_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) +#define vsliq_n_p8(a, b, __c) __extension__ ({ \ + poly8x16_t __a = (a); poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) +#define vsliq_n_p16(a, b, __c) __extension__ ({ \ + poly16x8_t __a = (a); poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) + +#define vsra_n_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_neon_vsra_n_v(__a, __b, __c, 0); }) +#define vsra_n_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vsra_n_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vsra_n_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vsra_n_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vsra_n_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vsra_n_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vsra_n_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vsraq_n_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_neon_vsraq_n_v(__a, __b, __c, 32); }) +#define vsraq_n_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vsraq_n_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vsraq_n_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vsraq_n_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vsraq_n_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vsraq_n_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vsraq_n_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) + +#define vsri_n_s8(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int8x8_t __b = (b); \ + (int8x8_t)__builtin_neon_vsri_n_v(__a, __b, __c, 0); }) +#define vsri_n_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); \ + (int16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) +#define vsri_n_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); \ + (int32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) +#define vsri_n_s64(a, b, __c) __extension__ ({ \ + int64x1_t __a = (a); int64x1_t __b = (b); \ + (int64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) +#define vsri_n_u8(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint8x8_t __b = (b); \ + (uint8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); }) +#define vsri_n_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); \ + (uint16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); }) +#define vsri_n_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); \ + (uint32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); }) +#define vsri_n_u64(a, b, __c) __extension__ ({ \ + uint64x1_t __a = (a); uint64x1_t __b = (b); \ + (uint64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); }) +#define vsri_n_p8(a, b, __c) __extension__ ({ \ + poly8x8_t __a = (a); poly8x8_t __b = (b); \ + (poly8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) +#define vsri_n_p16(a, b, __c) __extension__ ({ \ + poly16x4_t __a = (a); poly16x4_t __b = (b); \ + (poly16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) +#define vsriq_n_s8(a, b, __c) __extension__ ({ \ + int8x16_t __a = (a); int8x16_t __b = (b); \ + (int8x16_t)__builtin_neon_vsriq_n_v(__a, __b, __c, 32); }) +#define vsriq_n_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + (int16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); }) +#define vsriq_n_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + (int32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); }) +#define vsriq_n_s64(a, b, __c) __extension__ ({ \ + int64x2_t __a = (a); int64x2_t __b = (b); \ + (int64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); }) +#define vsriq_n_u8(a, b, __c) __extension__ ({ \ + uint8x16_t __a = (a); uint8x16_t __b = (b); \ + (uint8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); }) +#define vsriq_n_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + (uint16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); }) +#define vsriq_n_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + (uint32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); }) +#define vsriq_n_u64(a, b, __c) __extension__ ({ \ + uint64x2_t __a = (a); uint64x2_t __b = (b); \ + (uint64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); }) +#define vsriq_n_p8(a, b, __c) __extension__ ({ \ + poly8x16_t __a = (a); poly8x16_t __b = (b); \ + (poly8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); }) +#define vsriq_n_p16(a, b, __c) __extension__ ({ \ + poly16x8_t __a = (a); poly16x8_t __b = (b); \ + (poly16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); }) + +#define vst1q_u8(__a, b) __extension__ ({ \ + uint8x16_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 48); }) +#define vst1q_u16(__a, b) __extension__ ({ \ + uint16x8_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 49); }) +#define vst1q_u32(__a, b) __extension__ ({ \ + uint32x4_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 50); }) +#define vst1q_u64(__a, b) __extension__ ({ \ + uint64x2_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 51); }) +#define vst1q_s8(__a, b) __extension__ ({ \ + int8x16_t __b = (b); \ + __builtin_neon_vst1q_v(__a, __b, 32); }) +#define vst1q_s16(__a, b) __extension__ ({ \ + int16x8_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 33); }) +#define vst1q_s32(__a, b) __extension__ ({ \ + int32x4_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 34); }) +#define vst1q_s64(__a, b) __extension__ ({ \ + int64x2_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 35); }) +#define vst1q_f16(__a, b) __extension__ ({ \ + float16x8_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 38); }) +#define vst1q_f32(__a, b) __extension__ ({ \ + float32x4_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 39); }) +#define vst1q_p8(__a, b) __extension__ ({ \ + poly8x16_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 36); }) +#define vst1q_p16(__a, b) __extension__ ({ \ + poly16x8_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 37); }) +#define vst1_u8(__a, b) __extension__ ({ \ + uint8x8_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 16); }) +#define vst1_u16(__a, b) __extension__ ({ \ + uint16x4_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 17); }) +#define vst1_u32(__a, b) __extension__ ({ \ + uint32x2_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 18); }) +#define vst1_u64(__a, b) __extension__ ({ \ + uint64x1_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 19); }) +#define vst1_s8(__a, b) __extension__ ({ \ + int8x8_t __b = (b); \ + __builtin_neon_vst1_v(__a, __b, 0); }) +#define vst1_s16(__a, b) __extension__ ({ \ + int16x4_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 1); }) +#define vst1_s32(__a, b) __extension__ ({ \ + int32x2_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 2); }) +#define vst1_s64(__a, b) __extension__ ({ \ + int64x1_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 3); }) +#define vst1_f16(__a, b) __extension__ ({ \ + float16x4_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 6); }) +#define vst1_f32(__a, b) __extension__ ({ \ + float32x2_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 7); }) +#define vst1_p8(__a, b) __extension__ ({ \ + poly8x8_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 4); }) +#define vst1_p16(__a, b) __extension__ ({ \ + poly16x4_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 5); }) + +#define vst1q_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x16_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 48); }) +#define vst1q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 49); }) +#define vst1q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 50); }) +#define vst1q_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x2_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 51); }) +#define vst1q_lane_s8(__a, b, __c) __extension__ ({ \ + int8x16_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, __b, __c, 32); }) +#define vst1q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 33); }) +#define vst1q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 34); }) +#define vst1q_lane_s64(__a, b, __c) __extension__ ({ \ + int64x2_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 35); }) +#define vst1q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 38); }) +#define vst1q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 39); }) +#define vst1q_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x16_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 36); }) +#define vst1q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8_t __b = (b); \ + __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 37); }) +#define vst1_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 16); }) +#define vst1_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 17); }) +#define vst1_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 18); }) +#define vst1_lane_u64(__a, b, __c) __extension__ ({ \ + uint64x1_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 19); }) +#define vst1_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, __b, __c, 0); }) +#define vst1_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 1); }) +#define vst1_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 2); }) +#define vst1_lane_s64(__a, b, __c) __extension__ ({ \ + int64x1_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 3); }) +#define vst1_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 6); }) +#define vst1_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 7); }) +#define vst1_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 4); }) +#define vst1_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4_t __b = (b); \ + __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 5); }) + +#define vst2q_u8(__a, b) __extension__ ({ \ + uint8x16x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 48); }) +#define vst2q_u16(__a, b) __extension__ ({ \ + uint16x8x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 49); }) +#define vst2q_u32(__a, b) __extension__ ({ \ + uint32x4x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 50); }) +#define vst2q_s8(__a, b) __extension__ ({ \ + int8x16x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, __b.val[0], __b.val[1], 32); }) +#define vst2q_s16(__a, b) __extension__ ({ \ + int16x8x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 33); }) +#define vst2q_s32(__a, b) __extension__ ({ \ + int32x4x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 34); }) +#define vst2q_f16(__a, b) __extension__ ({ \ + float16x8x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 38); }) +#define vst2q_f32(__a, b) __extension__ ({ \ + float32x4x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 39); }) +#define vst2q_p8(__a, b) __extension__ ({ \ + poly8x16x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 36); }) +#define vst2q_p16(__a, b) __extension__ ({ \ + poly16x8x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 37); }) +#define vst2_u8(__a, b) __extension__ ({ \ + uint8x8x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 16); }) +#define vst2_u16(__a, b) __extension__ ({ \ + uint16x4x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 17); }) +#define vst2_u32(__a, b) __extension__ ({ \ + uint32x2x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 18); }) +#define vst2_u64(__a, b) __extension__ ({ \ + uint64x1x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 19); }) +#define vst2_s8(__a, b) __extension__ ({ \ + int8x8x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, __b.val[0], __b.val[1], 0); }) +#define vst2_s16(__a, b) __extension__ ({ \ + int16x4x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); }) +#define vst2_s32(__a, b) __extension__ ({ \ + int32x2x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); }) +#define vst2_s64(__a, b) __extension__ ({ \ + int64x1x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); }) +#define vst2_f16(__a, b) __extension__ ({ \ + float16x4x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); }) +#define vst2_f32(__a, b) __extension__ ({ \ + float32x2x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); }) +#define vst2_p8(__a, b) __extension__ ({ \ + poly8x8x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); }) +#define vst2_p16(__a, b) __extension__ ({ \ + poly16x4x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); }) + +#define vst2q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x2_t __b = (b); \ + __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); }) +#define vst2q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x2_t __b = (b); \ + __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); }) +#define vst2q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x2_t __b = (b); \ + __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); }) +#define vst2q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x2_t __b = (b); \ + __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); }) +#define vst2q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x2_t __b = (b); \ + __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); }) +#define vst2q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x2_t __b = (b); \ + __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); }) +#define vst2q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x2_t __b = (b); \ + __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); }) +#define vst2_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 16); }) +#define vst2_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 17); }) +#define vst2_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 18); }) +#define vst2_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, __b.val[0], __b.val[1], __c, 0); }) +#define vst2_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 1); }) +#define vst2_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 2); }) +#define vst2_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 6); }) +#define vst2_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 7); }) +#define vst2_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 4); }) +#define vst2_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x2_t __b = (b); \ + __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __c, 5); }) + +#define vst3q_u8(__a, b) __extension__ ({ \ + uint8x16x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 48); }) +#define vst3q_u16(__a, b) __extension__ ({ \ + uint16x8x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 49); }) +#define vst3q_u32(__a, b) __extension__ ({ \ + uint32x4x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 50); }) +#define vst3q_s8(__a, b) __extension__ ({ \ + int8x16x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, __b.val[0], __b.val[1], __b.val[2], 32); }) +#define vst3q_s16(__a, b) __extension__ ({ \ + int16x8x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 33); }) +#define vst3q_s32(__a, b) __extension__ ({ \ + int32x4x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 34); }) +#define vst3q_f16(__a, b) __extension__ ({ \ + float16x8x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 38); }) +#define vst3q_f32(__a, b) __extension__ ({ \ + float32x4x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 39); }) +#define vst3q_p8(__a, b) __extension__ ({ \ + poly8x16x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 36); }) +#define vst3q_p16(__a, b) __extension__ ({ \ + poly16x8x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 37); }) +#define vst3_u8(__a, b) __extension__ ({ \ + uint8x8x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 16); }) +#define vst3_u16(__a, b) __extension__ ({ \ + uint16x4x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 17); }) +#define vst3_u32(__a, b) __extension__ ({ \ + uint32x2x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 18); }) +#define vst3_u64(__a, b) __extension__ ({ \ + uint64x1x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 19); }) +#define vst3_s8(__a, b) __extension__ ({ \ + int8x8x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); }) +#define vst3_s16(__a, b) __extension__ ({ \ + int16x4x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 1); }) +#define vst3_s32(__a, b) __extension__ ({ \ + int32x2x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 2); }) +#define vst3_s64(__a, b) __extension__ ({ \ + int64x1x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 3); }) +#define vst3_f16(__a, b) __extension__ ({ \ + float16x4x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 6); }) +#define vst3_f32(__a, b) __extension__ ({ \ + float32x2x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 7); }) +#define vst3_p8(__a, b) __extension__ ({ \ + poly8x8x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 4); }) +#define vst3_p16(__a, b) __extension__ ({ \ + poly16x4x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 5); }) + +#define vst3q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x3_t __b = (b); \ + __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); }) +#define vst3q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x3_t __b = (b); \ + __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); }) +#define vst3q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x3_t __b = (b); \ + __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); }) +#define vst3q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x3_t __b = (b); \ + __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); }) +#define vst3q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x3_t __b = (b); \ + __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); }) +#define vst3q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x3_t __b = (b); \ + __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); }) +#define vst3q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x3_t __b = (b); \ + __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); }) +#define vst3_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); }) +#define vst3_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); }) +#define vst3_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); }) +#define vst3_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); }) +#define vst3_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); }) +#define vst3_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); }) +#define vst3_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); }) +#define vst3_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); }) +#define vst3_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); }) +#define vst3_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x3_t __b = (b); \ + __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); }) + +#define vst4q_u8(__a, b) __extension__ ({ \ + uint8x16x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 48); }) +#define vst4q_u16(__a, b) __extension__ ({ \ + uint16x8x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 49); }) +#define vst4q_u32(__a, b) __extension__ ({ \ + uint32x4x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 50); }) +#define vst4q_s8(__a, b) __extension__ ({ \ + int8x16x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 32); }) +#define vst4q_s16(__a, b) __extension__ ({ \ + int16x8x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 33); }) +#define vst4q_s32(__a, b) __extension__ ({ \ + int32x4x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 34); }) +#define vst4q_f16(__a, b) __extension__ ({ \ + float16x8x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 38); }) +#define vst4q_f32(__a, b) __extension__ ({ \ + float32x4x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 39); }) +#define vst4q_p8(__a, b) __extension__ ({ \ + poly8x16x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 36); }) +#define vst4q_p16(__a, b) __extension__ ({ \ + poly16x8x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 37); }) +#define vst4_u8(__a, b) __extension__ ({ \ + uint8x8x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 16); }) +#define vst4_u16(__a, b) __extension__ ({ \ + uint16x4x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 17); }) +#define vst4_u32(__a, b) __extension__ ({ \ + uint32x2x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 18); }) +#define vst4_u64(__a, b) __extension__ ({ \ + uint64x1x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 19); }) +#define vst4_s8(__a, b) __extension__ ({ \ + int8x8x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0); }) +#define vst4_s16(__a, b) __extension__ ({ \ + int16x4x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 1); }) +#define vst4_s32(__a, b) __extension__ ({ \ + int32x2x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 2); }) +#define vst4_s64(__a, b) __extension__ ({ \ + int64x1x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 3); }) +#define vst4_f16(__a, b) __extension__ ({ \ + float16x4x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 6); }) +#define vst4_f32(__a, b) __extension__ ({ \ + float32x2x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 7); }) +#define vst4_p8(__a, b) __extension__ ({ \ + poly8x8x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 4); }) +#define vst4_p16(__a, b) __extension__ ({ \ + poly16x4x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 5); }) + +#define vst4q_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x8x4_t __b = (b); \ + __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); }) +#define vst4q_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x4x4_t __b = (b); \ + __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); }) +#define vst4q_lane_s16(__a, b, __c) __extension__ ({ \ + int16x8x4_t __b = (b); \ + __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); }) +#define vst4q_lane_s32(__a, b, __c) __extension__ ({ \ + int32x4x4_t __b = (b); \ + __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); }) +#define vst4q_lane_f16(__a, b, __c) __extension__ ({ \ + float16x8x4_t __b = (b); \ + __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); }) +#define vst4q_lane_f32(__a, b, __c) __extension__ ({ \ + float32x4x4_t __b = (b); \ + __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); }) +#define vst4q_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x8x4_t __b = (b); \ + __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); }) +#define vst4_lane_u8(__a, b, __c) __extension__ ({ \ + uint8x8x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); }) +#define vst4_lane_u16(__a, b, __c) __extension__ ({ \ + uint16x4x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); }) +#define vst4_lane_u32(__a, b, __c) __extension__ ({ \ + uint32x2x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); }) +#define vst4_lane_s8(__a, b, __c) __extension__ ({ \ + int8x8x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); }) +#define vst4_lane_s16(__a, b, __c) __extension__ ({ \ + int16x4x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); }) +#define vst4_lane_s32(__a, b, __c) __extension__ ({ \ + int32x2x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); }) +#define vst4_lane_f16(__a, b, __c) __extension__ ({ \ + float16x4x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); }) +#define vst4_lane_f32(__a, b, __c) __extension__ ({ \ + float32x2x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); }) +#define vst4_lane_p8(__a, b, __c) __extension__ ({ \ + poly8x8x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); }) +#define vst4_lane_p16(__a, b, __c) __extension__ ({ \ + poly16x4x4_t __b = (b); \ + __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); }) + +__ai int8x8_t vsub_s8(int8x8_t __a, int8x8_t __b) { + return __a - __b; } +__ai int16x4_t vsub_s16(int16x4_t __a, int16x4_t __b) { + return __a - __b; } +__ai int32x2_t vsub_s32(int32x2_t __a, int32x2_t __b) { + return __a - __b; } +__ai int64x1_t vsub_s64(int64x1_t __a, int64x1_t __b) { + return __a - __b; } +__ai float32x2_t vsub_f32(float32x2_t __a, float32x2_t __b) { + return __a - __b; } +__ai uint8x8_t vsub_u8(uint8x8_t __a, uint8x8_t __b) { + return __a - __b; } +__ai uint16x4_t vsub_u16(uint16x4_t __a, uint16x4_t __b) { + return __a - __b; } +__ai uint32x2_t vsub_u32(uint32x2_t __a, uint32x2_t __b) { + return __a - __b; } +__ai uint64x1_t vsub_u64(uint64x1_t __a, uint64x1_t __b) { + return __a - __b; } +__ai int8x16_t vsubq_s8(int8x16_t __a, int8x16_t __b) { + return __a - __b; } +__ai int16x8_t vsubq_s16(int16x8_t __a, int16x8_t __b) { + return __a - __b; } +__ai int32x4_t vsubq_s32(int32x4_t __a, int32x4_t __b) { + return __a - __b; } +__ai int64x2_t vsubq_s64(int64x2_t __a, int64x2_t __b) { + return __a - __b; } +__ai float32x4_t vsubq_f32(float32x4_t __a, float32x4_t __b) { + return __a - __b; } +__ai uint8x16_t vsubq_u8(uint8x16_t __a, uint8x16_t __b) { + return __a - __b; } +__ai uint16x8_t vsubq_u16(uint16x8_t __a, uint16x8_t __b) { + return __a - __b; } +__ai uint32x4_t vsubq_u32(uint32x4_t __a, uint32x4_t __b) { + return __a - __b; } +__ai uint64x2_t vsubq_u64(uint64x2_t __a, uint64x2_t __b) { + return __a - __b; } + +__ai int8x8_t vsubhn_s16(int16x8_t __a, int16x8_t __b) { + return (int8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } +__ai int16x4_t vsubhn_s32(int32x4_t __a, int32x4_t __b) { + return (int16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } +__ai int32x2_t vsubhn_s64(int64x2_t __a, int64x2_t __b) { + return (int32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } +__ai uint8x8_t vsubhn_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); } +__ai uint16x4_t vsubhn_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17); } +__ai uint32x2_t vsubhn_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18); } + +__ai int16x8_t vsubl_s8(int8x8_t __a, int8x8_t __b) { + return vmovl_s8(__a) - vmovl_s8(__b); } +__ai int32x4_t vsubl_s16(int16x4_t __a, int16x4_t __b) { + return vmovl_s16(__a) - vmovl_s16(__b); } +__ai int64x2_t vsubl_s32(int32x2_t __a, int32x2_t __b) { + return vmovl_s32(__a) - vmovl_s32(__b); } +__ai uint16x8_t vsubl_u8(uint8x8_t __a, uint8x8_t __b) { + return vmovl_u8(__a) - vmovl_u8(__b); } +__ai uint32x4_t vsubl_u16(uint16x4_t __a, uint16x4_t __b) { + return vmovl_u16(__a) - vmovl_u16(__b); } +__ai uint64x2_t vsubl_u32(uint32x2_t __a, uint32x2_t __b) { + return vmovl_u32(__a) - vmovl_u32(__b); } + +__ai int16x8_t vsubw_s8(int16x8_t __a, int8x8_t __b) { + return __a - vmovl_s8(__b); } +__ai int32x4_t vsubw_s16(int32x4_t __a, int16x4_t __b) { + return __a - vmovl_s16(__b); } +__ai int64x2_t vsubw_s32(int64x2_t __a, int32x2_t __b) { + return __a - vmovl_s32(__b); } +__ai uint16x8_t vsubw_u8(uint16x8_t __a, uint8x8_t __b) { + return __a - vmovl_u8(__b); } +__ai uint32x4_t vsubw_u16(uint32x4_t __a, uint16x4_t __b) { + return __a - vmovl_u16(__b); } +__ai uint64x2_t vsubw_u32(uint64x2_t __a, uint32x2_t __b) { + return __a - vmovl_u32(__b); } + +__ai uint8x8_t vtbl1_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai int8x8_t vtbl1_s8(int8x8_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vtbl1_v(__a, __b, 0); } +__ai poly8x8_t vtbl1_p8(poly8x8_t __a, uint8x8_t __b) { + return (poly8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 4); } + +__ai uint8x8_t vtbl2_u8(uint8x8x2_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__b, 16); } +__ai int8x8_t vtbl2_s8(int8x8x2_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vtbl2_v(__a.val[0], __a.val[1], __b, 0); } +__ai poly8x8_t vtbl2_p8(poly8x8x2_t __a, uint8x8_t __b) { + return (poly8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__b, 4); } + +__ai uint8x8_t vtbl3_u8(uint8x8x3_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 16); } +__ai int8x8_t vtbl3_s8(int8x8x3_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vtbl3_v(__a.val[0], __a.val[1], __a.val[2], __b, 0); } +__ai poly8x8_t vtbl3_p8(poly8x8x3_t __a, uint8x8_t __b) { + return (poly8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 4); } + +__ai uint8x8_t vtbl4_u8(uint8x8x4_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 16); } +__ai int8x8_t vtbl4_s8(int8x8x4_t __a, int8x8_t __b) { + return (int8x8_t)__builtin_neon_vtbl4_v(__a.val[0], __a.val[1], __a.val[2], __a.val[3], __b, 0); } +__ai poly8x8_t vtbl4_p8(poly8x8x4_t __a, uint8x8_t __b) { + return (poly8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.val[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 4); } + +__ai uint8x8_t vtbx1_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 16); } +__ai int8x8_t vtbx1_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_neon_vtbx1_v(__a, __b, __c, 0); } +__ai poly8x8_t vtbx1_p8(poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) { + return (poly8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 4); } + +__ai uint8x8_t vtbx2_u8(uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 16); } +__ai int8x8_t vtbx2_s8(int8x8_t __a, int8x8x2_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_neon_vtbx2_v(__a, __b.val[0], __b.val[1], __c, 0); } +__ai poly8x8_t vtbx2_p8(poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) { + return (poly8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 4); } + +__ai uint8x8_t vtbx3_u8(uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 16); } +__ai int8x8_t vtbx3_s8(int8x8_t __a, int8x8x3_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_neon_vtbx3_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); } +__ai poly8x8_t vtbx3_p8(poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) { + return (poly8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 4); } + +__ai uint8x8_t vtbx4_u8(uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) { + return (uint8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 16); } +__ai int8x8_t vtbx4_s8(int8x8_t __a, int8x8x4_t __b, int8x8_t __c) { + return (int8x8_t)__builtin_neon_vtbx4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], __c, 0); } +__ai poly8x8_t vtbx4_p8(poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) { + return (poly8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 4); } + +__ai int8x8x2_t vtrn_s8(int8x8_t __a, int8x8_t __b) { + int8x8x2_t r; __builtin_neon_vtrn_v(&r, __a, __b, 0); return r; } +__ai int16x4x2_t vtrn_s16(int16x4_t __a, int16x4_t __b) { + int16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } +__ai int32x2x2_t vtrn_s32(int32x2_t __a, int32x2_t __b) { + int32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } +__ai uint8x8x2_t vtrn_u8(uint8x8_t __a, uint8x8_t __b) { + uint8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } +__ai uint16x4x2_t vtrn_u16(uint16x4_t __a, uint16x4_t __b) { + uint16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } +__ai uint32x2x2_t vtrn_u32(uint32x2_t __a, uint32x2_t __b) { + uint32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } +__ai float32x2x2_t vtrn_f32(float32x2_t __a, float32x2_t __b) { + float32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } +__ai poly8x8x2_t vtrn_p8(poly8x8_t __a, poly8x8_t __b) { + poly8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } +__ai poly16x4x2_t vtrn_p16(poly16x4_t __a, poly16x4_t __b) { + poly16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } +__ai int8x16x2_t vtrnq_s8(int8x16_t __a, int8x16_t __b) { + int8x16x2_t r; __builtin_neon_vtrnq_v(&r, __a, __b, 32); return r; } +__ai int16x8x2_t vtrnq_s16(int16x8_t __a, int16x8_t __b) { + int16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } +__ai int32x4x2_t vtrnq_s32(int32x4_t __a, int32x4_t __b) { + int32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } +__ai uint8x16x2_t vtrnq_u8(uint8x16_t __a, uint8x16_t __b) { + uint8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } +__ai uint16x8x2_t vtrnq_u16(uint16x8_t __a, uint16x8_t __b) { + uint16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } +__ai uint32x4x2_t vtrnq_u32(uint32x4_t __a, uint32x4_t __b) { + uint32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } +__ai float32x4x2_t vtrnq_f32(float32x4_t __a, float32x4_t __b) { + float32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } +__ai poly8x16x2_t vtrnq_p8(poly8x16_t __a, poly8x16_t __b) { + poly8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } +__ai poly16x8x2_t vtrnq_p16(poly16x8_t __a, poly16x8_t __b) { + poly16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } + +__ai uint8x8_t vtst_s8(int8x8_t __a, int8x8_t __b) { + return (uint8x8_t)__builtin_neon_vtst_v(__a, __b, 16); } +__ai uint16x4_t vtst_s16(int16x4_t __a, int16x4_t __b) { + return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vtst_s32(int32x2_t __a, int32x2_t __b) { + return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint8x8_t vtst_u8(uint8x8_t __a, uint8x8_t __b) { + return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vtst_u16(uint16x4_t __a, uint16x4_t __b) { + return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint32x2_t vtst_u32(uint32x2_t __a, uint32x2_t __b) { + return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); } +__ai uint8x8_t vtst_p8(poly8x8_t __a, poly8x8_t __b) { + return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); } +__ai uint16x4_t vtst_p16(poly16x4_t __a, poly16x4_t __b) { + return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); } +__ai uint8x16_t vtstq_s8(int8x16_t __a, int8x16_t __b) { + return (uint8x16_t)__builtin_neon_vtstq_v(__a, __b, 48); } +__ai uint16x8_t vtstq_s16(int16x8_t __a, int16x8_t __b) { + return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vtstq_s32(int32x4_t __a, int32x4_t __b) { + return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint8x16_t vtstq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vtstq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vtstq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai uint8x16_t vtstq_p8(poly8x16_t __a, poly8x16_t __b) { + return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vtstq_p16(poly16x8_t __a, poly16x8_t __b) { + return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); } + +__ai int8x8x2_t vuzp_s8(int8x8_t __a, int8x8_t __b) { + int8x8x2_t r; __builtin_neon_vuzp_v(&r, __a, __b, 0); return r; } +__ai int16x4x2_t vuzp_s16(int16x4_t __a, int16x4_t __b) { + int16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } +__ai int32x2x2_t vuzp_s32(int32x2_t __a, int32x2_t __b) { + int32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } +__ai uint8x8x2_t vuzp_u8(uint8x8_t __a, uint8x8_t __b) { + uint8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } +__ai uint16x4x2_t vuzp_u16(uint16x4_t __a, uint16x4_t __b) { + uint16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } +__ai uint32x2x2_t vuzp_u32(uint32x2_t __a, uint32x2_t __b) { + uint32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } +__ai float32x2x2_t vuzp_f32(float32x2_t __a, float32x2_t __b) { + float32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } +__ai poly8x8x2_t vuzp_p8(poly8x8_t __a, poly8x8_t __b) { + poly8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } +__ai poly16x4x2_t vuzp_p16(poly16x4_t __a, poly16x4_t __b) { + poly16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } +__ai int8x16x2_t vuzpq_s8(int8x16_t __a, int8x16_t __b) { + int8x16x2_t r; __builtin_neon_vuzpq_v(&r, __a, __b, 32); return r; } +__ai int16x8x2_t vuzpq_s16(int16x8_t __a, int16x8_t __b) { + int16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } +__ai int32x4x2_t vuzpq_s32(int32x4_t __a, int32x4_t __b) { + int32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } +__ai uint8x16x2_t vuzpq_u8(uint8x16_t __a, uint8x16_t __b) { + uint8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } +__ai uint16x8x2_t vuzpq_u16(uint16x8_t __a, uint16x8_t __b) { + uint16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } +__ai uint32x4x2_t vuzpq_u32(uint32x4_t __a, uint32x4_t __b) { + uint32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } +__ai float32x4x2_t vuzpq_f32(float32x4_t __a, float32x4_t __b) { + float32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } +__ai poly8x16x2_t vuzpq_p8(poly8x16_t __a, poly8x16_t __b) { + poly8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } +__ai poly16x8x2_t vuzpq_p16(poly16x8_t __a, poly16x8_t __b) { + poly16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } + +__ai int8x8x2_t vzip_s8(int8x8_t __a, int8x8_t __b) { + int8x8x2_t r; __builtin_neon_vzip_v(&r, __a, __b, 0); return r; } +__ai int16x4x2_t vzip_s16(int16x4_t __a, int16x4_t __b) { + int16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); return r; } +__ai int32x2x2_t vzip_s32(int32x2_t __a, int32x2_t __b) { + int32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); return r; } +__ai uint8x8x2_t vzip_u8(uint8x8_t __a, uint8x8_t __b) { + uint8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); return r; } +__ai uint16x4x2_t vzip_u16(uint16x4_t __a, uint16x4_t __b) { + uint16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); return r; } +__ai uint32x2x2_t vzip_u32(uint32x2_t __a, uint32x2_t __b) { + uint32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); return r; } +__ai float32x2x2_t vzip_f32(float32x2_t __a, float32x2_t __b) { + float32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); return r; } +__ai poly8x8x2_t vzip_p8(poly8x8_t __a, poly8x8_t __b) { + poly8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); return r; } +__ai poly16x4x2_t vzip_p16(poly16x4_t __a, poly16x4_t __b) { + poly16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); return r; } +__ai int8x16x2_t vzipq_s8(int8x16_t __a, int8x16_t __b) { + int8x16x2_t r; __builtin_neon_vzipq_v(&r, __a, __b, 32); return r; } +__ai int16x8x2_t vzipq_s16(int16x8_t __a, int16x8_t __b) { + int16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; } +__ai int32x4x2_t vzipq_s32(int32x4_t __a, int32x4_t __b) { + int32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; } +__ai uint8x16x2_t vzipq_u8(uint8x16_t __a, uint8x16_t __b) { + uint8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48); return r; } +__ai uint16x8x2_t vzipq_u16(uint16x8_t __a, uint16x8_t __b) { + uint16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49); return r; } +__ai uint32x4x2_t vzipq_u32(uint32x4_t __a, uint32x4_t __b) { + uint32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50); return r; } +__ai float32x4x2_t vzipq_f32(float32x4_t __a, float32x4_t __b) { + float32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39); return r; } +__ai poly8x16x2_t vzipq_p8(poly8x16_t __a, poly8x16_t __b) { + poly8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36); return r; } +__ai poly16x8x2_t vzipq_p16(poly16x8_t __a, poly16x8_t __b) { + poly16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37); return r; } + +#ifdef __aarch64__ +__ai int16x8_t vmovl_high_s8(int8x16_t __a) { + int8x8_t __a1 = vget_high_s8(__a); + return (int16x8_t)vshll_n_s8(__a1, 0); } +__ai int32x4_t vmovl_high_s16(int16x8_t __a) { + int16x4_t __a1 = vget_high_s16(__a); + return (int32x4_t)vshll_n_s16(__a1, 0); } +__ai int64x2_t vmovl_high_s32(int32x4_t __a) { + int32x2_t __a1 = vget_high_s32(__a); + return (int64x2_t)vshll_n_s32(__a1, 0); } +__ai uint16x8_t vmovl_high_u8(uint8x16_t __a) { + uint8x8_t __a1 = vget_high_u8(__a); + return (uint16x8_t)vshll_n_u8(__a1, 0); } +__ai uint32x4_t vmovl_high_u16(uint16x8_t __a) { + uint16x4_t __a1 = vget_high_u16(__a); + return (uint32x4_t)vshll_n_u16(__a1, 0); } +__ai uint64x2_t vmovl_high_u32(uint32x4_t __a) { + uint32x2_t __a1 = vget_high_u32(__a); + return (uint64x2_t)vshll_n_u32(__a1, 0); } + +__ai int16x8_t vmull_high_s8(int8x16_t __a, int8x16_t __b) { + return vmull_s8(vget_high_s8(__a), vget_high_s8(__b)); } +__ai int32x4_t vmull_high_s16(int16x8_t __a, int16x8_t __b) { + return vmull_s16(vget_high_s16(__a), vget_high_s16(__b)); } +__ai int64x2_t vmull_high_s32(int32x4_t __a, int32x4_t __b) { + return vmull_s32(vget_high_s32(__a), vget_high_s32(__b)); } +__ai uint16x8_t vmull_high_u8(uint8x16_t __a, uint8x16_t __b) { + return vmull_u8(vget_high_u8(__a), vget_high_u8(__b)); } +__ai uint32x4_t vmull_high_u16(uint16x8_t __a, uint16x8_t __b) { + return vmull_u16(vget_high_u16(__a), vget_high_u16(__b)); } +__ai uint64x2_t vmull_high_u32(uint32x4_t __a, uint32x4_t __b) { + return vmull_u32(vget_high_u32(__a), vget_high_u32(__b)); } +__ai poly16x8_t vmull_high_p8(poly8x16_t __a, poly8x16_t __b) { + return vmull_p8(vget_high_p8(__a), vget_high_p8(__b)); } + +__ai int16x8_t vabdl_high_s8(int8x16_t __a, int8x16_t __b) { + return vabdl_s8(vget_high_s8(__a), vget_high_s8(__b)); } +__ai int32x4_t vabdl_high_s16(int16x8_t __a, int16x8_t __b) { + return vabdl_s16(vget_high_s16(__a), vget_high_s16(__b)); } +__ai int64x2_t vabdl_high_s32(int32x4_t __a, int32x4_t __b) { + return vabdl_s32(vget_high_s32(__a), vget_high_s32(__b)); } +__ai uint16x8_t vabdl_high_u8(uint8x16_t __a, uint8x16_t __b) { + return vabdl_u8(vget_high_u8(__a), vget_high_u8(__b)); } +__ai uint32x4_t vabdl_high_u16(uint16x8_t __a, uint16x8_t __b) { + return vabdl_u16(vget_high_u16(__a), vget_high_u16(__b)); } +__ai uint64x2_t vabdl_high_u32(uint32x4_t __a, uint32x4_t __b) { + return vabdl_u32(vget_high_u32(__a), vget_high_u32(__b)); } + +__ai float64x2_t vabdq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float64x2_t vaddq_f64(float64x2_t __a, float64x2_t __b) { + return __a + __b; } + +__ai int8x16_t vpaddq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vpaddq_v(__a, __b, 32); } +__ai int16x8_t vpaddq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vpaddq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vpaddq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vpaddq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vpaddq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vpaddq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpaddq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vpaddq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai uint8x16_t vaesdq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vaesdq_v((int8x16_t)__a, (int8x16_t)__b, 48); } + +__ai uint8x16_t vaeseq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vaeseq_v((int8x16_t)__a, (int8x16_t)__b, 48); } + +__ai uint8x16_t vaesimcq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_neon_vaesimcq_v((int8x16_t)__a, 48); } + +__ai uint8x16_t vaesmcq_u8(uint8x16_t __a) { + return (uint8x16_t)__builtin_neon_vaesmcq_v((int8x16_t)__a, 48); } + +__ai float64x2_t vbslq_f64(uint64x2_t __a, float64x2_t __b, float64x2_t __c) { + return (float64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 40); } + +__ai uint64x2_t vceqq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a == __b); } +__ai uint64x2_t vceqq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a == __b); } +__ai uint64x2_t vceqq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a == __b); } + +__ai uint64x2_t vcgeq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a >= __b); } +__ai uint64x2_t vcgeq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a >= __b); } +__ai uint64x2_t vcgeq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a >= __b); } + +__ai uint64x2_t vcgtq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a > __b); } +__ai uint64x2_t vcgtq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a > __b); } +__ai uint64x2_t vcgtq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a > __b); } + +__ai uint64x2_t vcleq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a <= __b); } +__ai uint64x2_t vcleq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a <= __b); } +__ai uint64x2_t vcleq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a <= __b); } + +__ai uint64x2_t vcltq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)(__a < __b); } +__ai uint64x2_t vcltq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)(__a < __b); } +__ai uint64x2_t vcltq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)(__a < __b); } + +__ai uint64x2_t vtstq_s64(int64x2_t __a, int64x2_t __b) { + return (uint64x2_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 51); } +__ai uint64x2_t vtstq_u64(uint64x2_t __a, uint64x2_t __b) { + return (uint64x2_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +#define vcopyq_lane_s8(a1, __b1, c1, __d1) __extension__ ({ \ + int8x16_t __a1 = (a1); int8x8_t __c1 = (c1); \ + int8_t __c2 = vget_lane_s8(__c1, __d1); \ + vsetq_lane_s8(__c2, __a1, __b1); }) +#define vcopyq_lane_s16(a1, __b1, c1, __d1) __extension__ ({ \ + int16x8_t __a1 = (a1); int16x4_t __c1 = (c1); \ + int16_t __c2 = vget_lane_s16(__c1, __d1); \ + vsetq_lane_s16(__c2, __a1, __b1); }) +#define vcopyq_lane_s32(a1, __b1, c1, __d1) __extension__ ({ \ + int32x4_t __a1 = (a1); int32x2_t __c1 = (c1); \ + int32_t __c2 = vget_lane_s32(__c1, __d1); \ + vsetq_lane_s32(__c2, __a1, __b1); }) +#define vcopyq_lane_s64(a1, __b1, c1, __d1) __extension__ ({ \ + int64x2_t __a1 = (a1); int64x1_t __c1 = (c1); \ + int64_t __c2 = vget_lane_s64(__c1, __d1); \ + vsetq_lane_s64(__c2, __a1, __b1); }) +#define vcopyq_lane_u8(a1, __b1, c1, __d1) __extension__ ({ \ + uint8x16_t __a1 = (a1); uint8x8_t __c1 = (c1); \ + uint8_t __c2 = vget_lane_u8(__c1, __d1); \ + vsetq_lane_u8(__c2, __a1, __b1); }) +#define vcopyq_lane_u16(a1, __b1, c1, __d1) __extension__ ({ \ + uint16x8_t __a1 = (a1); uint16x4_t __c1 = (c1); \ + uint16_t __c2 = vget_lane_u16(__c1, __d1); \ + vsetq_lane_u16(__c2, __a1, __b1); }) +#define vcopyq_lane_u32(a1, __b1, c1, __d1) __extension__ ({ \ + uint32x4_t __a1 = (a1); uint32x2_t __c1 = (c1); \ + uint32_t __c2 = vget_lane_u32(__c1, __d1); \ + vsetq_lane_u32(__c2, __a1, __b1); }) +#define vcopyq_lane_u64(a1, __b1, c1, __d1) __extension__ ({ \ + uint64x2_t __a1 = (a1); uint64x1_t __c1 = (c1); \ + uint64_t __c2 = vget_lane_u64(__c1, __d1); \ + vsetq_lane_u64(__c2, __a1, __b1); }) +#define vcopyq_lane_p8(a1, __b1, c1, __d1) __extension__ ({ \ + poly8x16_t __a1 = (a1); poly8x8_t __c1 = (c1); \ + poly8_t __c2 = vget_lane_p8(__c1, __d1); \ + vsetq_lane_p8(__c2, __a1, __b1); }) +#define vcopyq_lane_p16(a1, __b1, c1, __d1) __extension__ ({ \ + poly16x8_t __a1 = (a1); poly16x4_t __c1 = (c1); \ + poly16_t __c2 = vget_lane_p16(__c1, __d1); \ + vsetq_lane_p16(__c2, __a1, __b1); }) +#define vcopyq_lane_f32(a1, __b1, c1, __d1) __extension__ ({ \ + float32x4_t __a1 = (a1); float32x2_t __c1 = (c1); \ + float32_t __c2 = vget_lane_f32(__c1, __d1); \ + vsetq_lane_f32(__c2, __a1, __b1); }) +#define vcopyq_lane_f64(a1, __b1, c1, __d1) __extension__ ({ \ + float64x2_t __a1 = (a1); float64x1_t __c1 = (c1); \ + float64_t __c2 = vget_lane_f64(__c1, __d1); \ + vsetq_lane_f64(__c2, __a1, __b1); }) + +#define vcopyq_laneq_s8(a1, __b1, c1, __d1) __extension__ ({ \ + int8x16_t __a1 = (a1); int8x16_t __c1 = (c1); \ + int8_t __c2 = vgetq_lane_s8(__c1, __d1); \ + vsetq_lane_s8(__c2, __a1, __b1); }) +#define vcopyq_laneq_s16(a1, __b1, c1, __d1) __extension__ ({ \ + int16x8_t __a1 = (a1); int16x8_t __c1 = (c1); \ + int16_t __c2 = vgetq_lane_s16(__c1, __d1); \ + vsetq_lane_s16(__c2, __a1, __b1); }) +#define vcopyq_laneq_s32(a1, __b1, c1, __d1) __extension__ ({ \ + int32x4_t __a1 = (a1); int32x4_t __c1 = (c1); \ + int32_t __c2 = vgetq_lane_s32(__c1, __d1); \ + vsetq_lane_s32(__c2, __a1, __b1); }) +#define vcopyq_laneq_s64(a1, __b1, c1, __d1) __extension__ ({ \ + int64x2_t __a1 = (a1); int64x2_t __c1 = (c1); \ + int64_t __c2 = vgetq_lane_s64(__c1, __d1); \ + vsetq_lane_s64(__c2, __a1, __b1); }) +#define vcopyq_laneq_u8(a1, __b1, c1, __d1) __extension__ ({ \ + uint8x16_t __a1 = (a1); uint8x16_t __c1 = (c1); \ + uint8_t __c2 = vgetq_lane_u8(__c1, __d1); \ + vsetq_lane_u8(__c2, __a1, __b1); }) +#define vcopyq_laneq_u16(a1, __b1, c1, __d1) __extension__ ({ \ + uint16x8_t __a1 = (a1); uint16x8_t __c1 = (c1); \ + uint16_t __c2 = vgetq_lane_u16(__c1, __d1); \ + vsetq_lane_u16(__c2, __a1, __b1); }) +#define vcopyq_laneq_u32(a1, __b1, c1, __d1) __extension__ ({ \ + uint32x4_t __a1 = (a1); uint32x4_t __c1 = (c1); \ + uint32_t __c2 = vgetq_lane_u32(__c1, __d1); \ + vsetq_lane_u32(__c2, __a1, __b1); }) +#define vcopyq_laneq_u64(a1, __b1, c1, __d1) __extension__ ({ \ + uint64x2_t __a1 = (a1); uint64x2_t __c1 = (c1); \ + uint64_t __c2 = vgetq_lane_u64(__c1, __d1); \ + vsetq_lane_u64(__c2, __a1, __b1); }) +#define vcopyq_laneq_p8(a1, __b1, c1, __d1) __extension__ ({ \ + poly8x16_t __a1 = (a1); poly8x16_t __c1 = (c1); \ + poly8_t __c2 = vgetq_lane_p8(__c1, __d1); \ + vsetq_lane_p8(__c2, __a1, __b1); }) +#define vcopyq_laneq_p16(a1, __b1, c1, __d1) __extension__ ({ \ + poly16x8_t __a1 = (a1); poly16x8_t __c1 = (c1); \ + poly16_t __c2 = vgetq_lane_p16(__c1, __d1); \ + vsetq_lane_p16(__c2, __a1, __b1); }) +#define vcopyq_laneq_f32(a1, __b1, c1, __d1) __extension__ ({ \ + float32x4_t __a1 = (a1); float32x4_t __c1 = (c1); \ + float32_t __c2 = vgetq_lane_f32(__c1, __d1); \ + vsetq_lane_f32(__c2, __a1, __b1); }) +#define vcopy_laneq_f64(a1, __b1, c1, __d1) __extension__ ({ \ + float64x1_t __a1 = (a1); float64x1_t __c1 = (c1); \ + float64_t __c2 = vget_lane_f64(__c1, __d1); \ + vset_lane_f64(__c2, __a1, __b1); }) + +#define vcopy_lane_s8(a1, __b1, c1, __d1) __extension__ ({ \ + int8x8_t __a1 = (a1); int8x8_t __c1 = (c1); \ + int8_t __c2 = vget_lane_s8(__c1, __d1); \ + vset_lane_s8(__c2, __a1, __b1); }) +#define vcopy_lane_s16(a1, __b1, c1, __d1) __extension__ ({ \ + int16x4_t __a1 = (a1); int16x4_t __c1 = (c1); \ + int16_t __c2 = vget_lane_s16(__c1, __d1); \ + vset_lane_s16(__c2, __a1, __b1); }) +#define vcopy_lane_s32(a1, __b1, c1, __d1) __extension__ ({ \ + int32x2_t __a1 = (a1); int32x2_t __c1 = (c1); \ + int32_t __c2 = vget_lane_s32(__c1, __d1); \ + vset_lane_s32(__c2, __a1, __b1); }) +#define vcopy_lane_p8(a1, __b1, c1, __d1) __extension__ ({ \ + poly8x8_t __a1 = (a1); poly8x8_t __c1 = (c1); \ + poly8_t __c2 = vget_lane_p8(__c1, __d1); \ + vset_lane_p8(__c2, __a1, __b1); }) +#define vcopy_lane_p16(a1, __b1, c1, __d1) __extension__ ({ \ + poly16x4_t __a1 = (a1); poly16x4_t __c1 = (c1); \ + poly16_t __c2 = vget_lane_p16(__c1, __d1); \ + vset_lane_p16(__c2, __a1, __b1); }) +#define vcopy_lane_u8(a1, __b1, c1, __d1) __extension__ ({ \ + uint8x8_t __a1 = (a1); uint8x8_t __c1 = (c1); \ + uint8_t __c2 = vget_lane_u8(__c1, __d1); \ + vset_lane_u8(__c2, __a1, __b1); }) +#define vcopy_lane_u16(a1, __b1, c1, __d1) __extension__ ({ \ + uint16x4_t __a1 = (a1); uint16x4_t __c1 = (c1); \ + uint16_t __c2 = vget_lane_u16(__c1, __d1); \ + vset_lane_u16(__c2, __a1, __b1); }) +#define vcopy_lane_u32(a1, __b1, c1, __d1) __extension__ ({ \ + uint32x2_t __a1 = (a1); uint32x2_t __c1 = (c1); \ + uint32_t __c2 = vget_lane_u32(__c1, __d1); \ + vset_lane_u32(__c2, __a1, __b1); }) +#define vcopy_lane_f32(a1, __b1, c1, __d1) __extension__ ({ \ + float32x2_t __a1 = (a1); float32x2_t __c1 = (c1); \ + float32_t __c2 = vget_lane_f32(__c1, __d1); \ + vset_lane_f32(__c2, __a1, __b1); }) + +#define vcopy_laneq_s8(a1, __b1, c1, __d1) __extension__ ({ \ + int8x8_t __a1 = (a1); int8x16_t __c1 = (c1); \ + int8_t __c2 = vgetq_lane_s8(__c1, __d1); \ + vset_lane_s8(__c2, __a1, __b1); }) +#define vcopy_laneq_s16(a1, __b1, c1, __d1) __extension__ ({ \ + int16x4_t __a1 = (a1); int16x8_t __c1 = (c1); \ + int16_t __c2 = vgetq_lane_s16(__c1, __d1); \ + vset_lane_s16(__c2, __a1, __b1); }) +#define vcopy_laneq_s32(a1, __b1, c1, __d1) __extension__ ({ \ + int32x2_t __a1 = (a1); int32x4_t __c1 = (c1); \ + int32_t __c2 = vgetq_lane_s32(__c1, __d1); \ + vset_lane_s32(__c2, __a1, __b1); }) +#define vcopy_laneq_p8(a1, __b1, c1, __d1) __extension__ ({ \ + poly8x8_t __a1 = (a1); poly8x16_t __c1 = (c1); \ + poly8_t __c2 = vgetq_lane_p8(__c1, __d1); \ + vset_lane_p8(__c2, __a1, __b1); }) +#define vcopy_laneq_p16(a1, __b1, c1, __d1) __extension__ ({ \ + poly16x4_t __a1 = (a1); poly16x8_t __c1 = (c1); \ + poly16_t __c2 = vgetq_lane_p16(__c1, __d1); \ + vset_lane_p16(__c2, __a1, __b1); }) +#define vcopy_laneq_u8(a1, __b1, c1, __d1) __extension__ ({ \ + uint8x8_t __a1 = (a1); uint8x16_t __c1 = (c1); \ + uint8_t __c2 = vgetq_lane_u8(__c1, __d1); \ + vset_lane_u8(__c2, __a1, __b1); }) +#define vcopy_laneq_u16(a1, __b1, c1, __d1) __extension__ ({ \ + uint16x4_t __a1 = (a1); uint16x8_t __c1 = (c1); \ + uint16_t __c2 = vgetq_lane_u16(__c1, __d1); \ + vset_lane_u16(__c2, __a1, __b1); }) +#define vcopy_laneq_u32(a1, __b1, c1, __d1) __extension__ ({ \ + uint32x2_t __a1 = (a1); uint32x4_t __c1 = (c1); \ + uint32_t __c2 = vgetq_lane_u32(__c1, __d1); \ + vset_lane_u32(__c2, __a1, __b1); }) +#define vcopy_laneq_f32(a1, __b1, c1, __d1) __extension__ ({ \ + float32x2_t __a1 = (a1); float32x4_t __c1 = (c1); \ + float32_t __c2 = vgetq_lane_f32(__c1, __d1); \ + vset_lane_f32(__c2, __a1, __b1); }) + +__ai float64x1_t vcreate_f64(uint64_t __a) { + return (float64x1_t)__a; } + +#define vcvtq_n_f64_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + (float32x4_t)__builtin_neon_vcvtq_n_f64_v((int8x16_t)__a, __b, 35); }) +#define vcvtq_n_f64_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + (float32x4_t)__builtin_neon_vcvtq_n_f64_v((int8x16_t)__a, __b, 51); }) + +__ai float64x1_t vdup_n_f64(float64_t __a) { + return (float64x1_t){ __a }; } +__ai float64x2_t vdupq_n_f64(float64_t __a) { + return (float64x2_t){ __a, __a }; } + +__ai uint64x2_t vcageq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint64x2_t vcagtq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint64x2_t vcaleq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +__ai uint64x2_t vcaltq_f64(float64x2_t __a, float64x2_t __b) { + return (uint64x2_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 51); } + +#define vcvtq_n_s64_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + (int64x2_t)__builtin_neon_vcvtq_n_s64_v((int8x16_t)__a, __b, 35); }) + +#define vcvtq_n_u64_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + (uint64x2_t)__builtin_neon_vcvtq_n_u64_v((int8x16_t)__a, __b, 51); }) + +__ai float32x2_t vdiv_f32(float32x2_t __a, float32x2_t __b) { + return __a / __b; } +__ai float32x4_t vdivq_f32(float32x4_t __a, float32x4_t __b) { + return __a / __b; } +__ai float64x2_t vdivq_f64(float64x2_t __a, float64x2_t __b) { + return __a / __b; } + +__ai float32x2_t vmaxnm_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vmaxnm_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vmaxnmq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vmaxnmq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float32x2_t vpmaxnm_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vpmaxnm_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vpmaxnmq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vpmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpmaxnmq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vpmaxnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float32_t vmaxnmvq_f32(float32x4_t __a) { + return (float32_t)__builtin_neon_vmaxnmvq_f32(__a); } + +__ai float32x2_t vminnm_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vminnm_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vminnmq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vminnmq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vminnmq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vminnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float32x2_t vpminnm_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vpminnm_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float32x4_t vpminnmq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vpminnmq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpminnmq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vpminnmq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float32_t vminnmvq_f32(float32x4_t __a) { + return (float32_t)__builtin_neon_vminnmvq_f32(__a); } + +__ai float64x2_t vfmaq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) { + return (float64x2_t)__builtin_neon_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 40); } + +__ai float32x2_t vfms_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { + return (float32x2_t)__builtin_neon_vfms_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, 7); } +__ai float32x4_t vfmsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) { + return (float32x4_t)__builtin_neon_vfmsq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 39); } +__ai float64x2_t vfmsq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) { + return (float64x2_t)__builtin_neon_vfmsq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 40); } + +__ai float64x2_t vrecpsq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float64x2_t vrsqrtsq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +#define vget_lane_f64(a, __b) __extension__ ({ \ + float64x1_t __a = (a); \ + (float64_t)__builtin_neon_vget_lane_f64(__a, __b); }) +#define vgetq_lane_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + (float64_t)__builtin_neon_vgetq_lane_f64(__a, __b); }) + +#define vld1q_f64(__a) __extension__ ({ \ + (float64x2_t)__builtin_neon_vld1q_v(__a, 40); }) +#define vld1_f64(__a) __extension__ ({ \ + (float64x1_t)__builtin_neon_vld1_v(__a, 8); }) + +#define vld2q_u64(__a) __extension__ ({ \ + uint64x2x2_t r; __builtin_neon_vld2q_v(&r, __a, 51); r; }) +#define vld2q_s64(__a) __extension__ ({ \ + int64x2x2_t r; __builtin_neon_vld2q_v(&r, __a, 35); r; }) +#define vld2q_f64(__a) __extension__ ({ \ + float64x2x2_t r; __builtin_neon_vld2q_v(&r, __a, 40); r; }) +#define vld2_f64(__a) __extension__ ({ \ + float64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 8); r; }) + +#define vld3q_u64(__a) __extension__ ({ \ + uint64x2x3_t r; __builtin_neon_vld3q_v(&r, __a, 51); r; }) +#define vld3q_s64(__a) __extension__ ({ \ + int64x2x3_t r; __builtin_neon_vld3q_v(&r, __a, 35); r; }) +#define vld3q_f64(__a) __extension__ ({ \ + float64x2x3_t r; __builtin_neon_vld3q_v(&r, __a, 40); r; }) +#define vld3_f64(__a) __extension__ ({ \ + float64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 8); r; }) + +#define vld4q_u64(__a) __extension__ ({ \ + uint64x2x4_t r; __builtin_neon_vld4q_v(&r, __a, 51); r; }) +#define vld4q_s64(__a) __extension__ ({ \ + int64x2x4_t r; __builtin_neon_vld4q_v(&r, __a, 35); r; }) +#define vld4q_f64(__a) __extension__ ({ \ + float64x2x4_t r; __builtin_neon_vld4q_v(&r, __a, 40); r; }) +#define vld4_f64(__a) __extension__ ({ \ + float64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 8); r; }) + +__ai float64x2_t vmaxq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x16_t vpmaxq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vpmaxq_v(__a, __b, 32); } +__ai int16x8_t vpmaxq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vpmaxq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vpmaxq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vpmaxq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vpmaxq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vpmaxq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpmaxq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vpmaxq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float64x2_t vminq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai int8x16_t vpminq_s8(int8x16_t __a, int8x16_t __b) { + return (int8x16_t)__builtin_neon_vpminq_v(__a, __b, 32); } +__ai int16x8_t vpminq_s16(int16x8_t __a, int16x8_t __b) { + return (int16x8_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 33); } +__ai int32x4_t vpminq_s32(int32x4_t __a, int32x4_t __b) { + return (int32x4_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 34); } +__ai uint8x16_t vpminq_u8(uint8x16_t __a, uint8x16_t __b) { + return (uint8x16_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 48); } +__ai uint16x8_t vpminq_u16(uint16x8_t __a, uint16x8_t __b) { + return (uint16x8_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 49); } +__ai uint32x4_t vpminq_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 50); } +__ai float32x4_t vpminq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vpminq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vpminq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +__ai float64x2_t vmlaq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) { + return __a + (__b * __c); } + +__ai float64x2_t vmlsq_f64(float64x2_t __a, float64x2_t __b, float64x2_t __c) { + return __a - (__b * __c); } + +__ai float64x1_t vmov_n_f64(float64_t __a) { + return (float64x1_t){ __a }; } +__ai float64x2_t vmovq_n_f64(float64_t __a) { + return (float64x2_t){ __a, __a }; } + +__ai float64x2_t vmulq_f64(float64x2_t __a, float64x2_t __b) { + return __a * __b; } + +__ai float32x2_t vmulx_f32(float32x2_t __a, float32x2_t __b) { + return (float32x2_t)__builtin_neon_vmulx_v((int8x8_t)__a, (int8x8_t)__b, 7); } +__ai float64x1_t vmulx_f64(float64x1_t __a, float64x1_t __b) { + return (float64x1_t)__builtin_neon_vmulx_v((int8x8_t)__a, (int8x8_t)__b, 8); } +__ai float32x4_t vmulxq_f32(float32x4_t __a, float32x4_t __b) { + return (float32x4_t)__builtin_neon_vmulxq_v((int8x16_t)__a, (int8x16_t)__b, 39); } +__ai float64x2_t vmulxq_f64(float64x2_t __a, float64x2_t __b) { + return (float64x2_t)__builtin_neon_vmulxq_v((int8x16_t)__a, (int8x16_t)__b, 40); } + +#define vqrshrn_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)vcombine_s16(__a, vqrshrn_n_s16(__b, __c)); }) +#define vqrshrn_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)vcombine_s32(__a, vqrshrn_n_s32(__b, __c)); }) +#define vqrshrn_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)vcombine_s64(__a, vqrshrn_n_s64(__b, __c)); }) +#define vqrshrn_high_n_u16(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint16x8_t __b = (b); \ + (uint8x16_t)vcombine_u16(__a, vqrshrn_n_u16(__b, __c)); }) +#define vqrshrn_high_n_u32(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint32x4_t __b = (b); \ + (uint16x8_t)vcombine_u32(__a, vqrshrn_n_u32(__b, __c)); }) +#define vqrshrn_high_n_u64(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint64x2_t __b = (b); \ + (uint32x4_t)vcombine_u64(__a, vqrshrn_n_u64(__b, __c)); }) + +#define vqrshrun_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)vcombine_s16(__a, vqrshrun_n_s16(__b, __c)); }) +#define vqrshrun_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)vcombine_s32(__a, vqrshrun_n_s32(__b, __c)); }) +#define vqrshrun_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)vcombine_s64(__a, vqrshrun_n_s64(__b, __c)); }) + +#define vqshrn_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)vcombine_s16(__a, vqshrn_n_s16(__b, __c)); }) +#define vqshrn_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)vcombine_s32(__a, vqshrn_n_s32(__b, __c)); }) +#define vqshrn_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)vcombine_s64(__a, vqshrn_n_s64(__b, __c)); }) +#define vqshrn_high_n_u16(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint16x8_t __b = (b); \ + (uint8x16_t)vcombine_u16(__a, vqshrn_n_u16(__b, __c)); }) +#define vqshrn_high_n_u32(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint32x4_t __b = (b); \ + (uint16x8_t)vcombine_u32(__a, vqshrn_n_u32(__b, __c)); }) +#define vqshrn_high_n_u64(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint64x2_t __b = (b); \ + (uint32x4_t)vcombine_u64(__a, vqshrn_n_u64(__b, __c)); }) + +#define vqshrun_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)vcombine_s16(__a, vqshrun_n_s16(__b, __c)); }) +#define vqshrun_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)vcombine_s32(__a, vqshrun_n_s32(__b, __c)); }) +#define vqshrun_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)vcombine_s64(__a, vqshrun_n_s64(__b, __c)); }) + +#define vrshrn_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)vcombine_s16(__a, vrshrn_n_s16(__b, __c)); }) +#define vrshrn_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)vcombine_s32(__a, vrshrn_n_s32(__b, __c)); }) +#define vrshrn_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)vcombine_s64(__a, vrshrn_n_s64(__b, __c)); }) +#define vrshrn_high_n_u16(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint16x8_t __b = (b); \ + (uint8x16_t)vcombine_u16(__a, vrshrn_n_u16(__b, __c)); }) +#define vrshrn_high_n_u32(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint32x4_t __b = (b); \ + (uint16x8_t)vcombine_u32(__a, vrshrn_n_u32(__b, __c)); }) +#define vrshrn_high_n_u64(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint64x2_t __b = (b); \ + (uint32x4_t)vcombine_u64(__a, vrshrn_n_u64(__b, __c)); }) + +__ai int64_t vabsd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vabsd_s64(__a); } + +__ai int64_t vaddd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vaddd_s64(__a, __b); } +__ai uint64_t vaddd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vaddd_u64(__a, __b); } + +__ai float32_t vpadds_f32(float32x2_t __a) { + return (float32_t)__builtin_neon_vpadds_f32(__a); } +__ai int64_t vpaddd_s64(int64x2_t __a) { + return (int64_t)__builtin_neon_vpaddd_s64(__a); } +__ai float64_t vpaddd_f64(float64x2_t __a) { + return (float64_t)__builtin_neon_vpaddd_f64(__a); } + +__ai int64_t vceqd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vceqd_s64(__a, __b); } +__ai uint64_t vceqd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vceqd_u64(__a, __b); } + +__ai int64_t vceqzd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vceqzd_s64(__a); } +__ai uint64_t vceqzd_u64(uint64_t __a) { + return (uint64_t)__builtin_neon_vceqzd_u64(__a); } + +__ai int64_t vcged_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vcged_s64(__a, __b); } + +__ai int64_t vcgezd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vcgezd_s64(__a); } + +__ai int64_t vcgtd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vcgtd_s64(__a, __b); } + +__ai int64_t vcgtzd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vcgtzd_s64(__a); } + +__ai uint64_t vcgtd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vcgtd_u64(__a, __b); } + +__ai uint64_t vcged_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vcged_u64(__a, __b); } + +__ai int64_t vcled_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vcled_s64(__a, __b); } +__ai uint64_t vcled_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vcled_u64(__a, __b); } + +__ai int64_t vclezd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vclezd_s64(__a); } + +__ai int64_t vcltd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vcltd_s64(__a, __b); } +__ai uint64_t vcltd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vcltd_u64(__a, __b); } + +__ai int64_t vcltzd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vcltzd_s64(__a); } + +__ai int64_t vtstd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vtstd_s64(__a, __b); } +__ai uint64_t vtstd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vtstd_u64(__a, __b); } + +__ai uint32_t vcages_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vcages_f32(__a, __b); } +__ai uint64_t vcaged_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vcaged_f64(__a, __b); } + +__ai uint32_t vcagts_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vcagts_f32(__a, __b); } +__ai uint64_t vcagtd_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vcagtd_f64(__a, __b); } + +__ai uint32_t vcales_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vcales_f32(__a, __b); } +__ai uint64_t vcaled_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vcaled_f64(__a, __b); } + +__ai uint32_t vcalts_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vcalts_f32(__a, __b); } +__ai uint64_t vcaltd_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vcaltd_f64(__a, __b); } + +__ai uint32_t vceqs_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vceqs_f32(__a, __b); } +__ai uint64_t vceqd_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vceqd_f64(__a, __b); } + +__ai uint32_t vceqzs_f32(float32_t __a) { + return (uint32_t)__builtin_neon_vceqzs_f32(__a); } +__ai uint64_t vceqzd_f64(float64_t __a) { + return (uint64_t)__builtin_neon_vceqzd_f64(__a); } + +__ai uint32_t vcges_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vcges_f32(__a, __b); } +__ai uint64_t vcged_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vcged_f64(__a, __b); } + +__ai uint32_t vcgezs_f32(float32_t __a) { + return (uint32_t)__builtin_neon_vcgezs_f32(__a); } +__ai uint64_t vcgezd_f64(float64_t __a) { + return (uint64_t)__builtin_neon_vcgezd_f64(__a); } + +__ai uint32_t vcgts_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vcgts_f32(__a, __b); } +__ai uint64_t vcgtd_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vcgtd_f64(__a, __b); } + +__ai uint32_t vcgtzs_f32(float32_t __a) { + return (uint32_t)__builtin_neon_vcgtzs_f32(__a); } +__ai uint64_t vcgtzd_f64(float64_t __a) { + return (uint64_t)__builtin_neon_vcgtzd_f64(__a); } + +__ai uint32_t vcles_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vcles_f32(__a, __b); } +__ai uint64_t vcled_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vcled_f64(__a, __b); } + +__ai uint32_t vclezs_f32(float32_t __a) { + return (uint32_t)__builtin_neon_vclezs_f32(__a); } +__ai uint64_t vclezd_f64(float64_t __a) { + return (uint64_t)__builtin_neon_vclezd_f64(__a); } + +__ai uint32_t vclts_f32(float32_t __a, float32_t __b) { + return (uint32_t)__builtin_neon_vclts_f32(__a, __b); } +__ai uint64_t vcltd_f64(float64_t __a, float64_t __b) { + return (uint64_t)__builtin_neon_vcltd_f64(__a, __b); } + +__ai uint32_t vcltzs_f32(float32_t __a) { + return (uint32_t)__builtin_neon_vcltzs_f32(__a); } +__ai uint64_t vcltzd_f64(float64_t __a) { + return (uint64_t)__builtin_neon_vcltzd_f64(__a); } + +#define vcvts_n_s32_f32(a, __b) __extension__ ({ \ + float32_t __a = (a); \ + (int32x2_t)__builtin_neon_vcvts_n_s32_f32(__a, __b); }) + +#define vcvtd_n_s64_f64(a, __b) __extension__ ({ \ + float64_t __a = (a); \ + (int64x1_t)__builtin_neon_vcvtd_n_s64_f64(__a, __b); }) + +#define vcvts_n_u32_f32(a, __b) __extension__ ({ \ + float32_t __a = (a); \ + (uint32x2_t)__builtin_neon_vcvts_n_u32_f32(__a, __b); }) + +#define vcvtd_n_u64_f64(a, __b) __extension__ ({ \ + float64_t __a = (a); \ + (uint64x1_t)__builtin_neon_vcvtd_n_u64_f64(__a, __b); }) + +__ai float32_t vpmaxnms_f32(float32x2_t __a) { + return (float32_t)__builtin_neon_vpmaxnms_f32(__a); } +__ai float64_t vpmaxnmqd_f64(float64x2_t __a) { + return (float64_t)__builtin_neon_vpmaxnmqd_f64(__a); } + +__ai float32_t vpmaxs_f32(float32x2_t __a) { + return (float32_t)__builtin_neon_vpmaxs_f32(__a); } +__ai float64_t vpmaxqd_f64(float64x2_t __a) { + return (float64_t)__builtin_neon_vpmaxqd_f64(__a); } + +__ai float32_t vpminnms_f32(float32x2_t __a) { + return (float32_t)__builtin_neon_vpminnms_f32(__a); } +__ai float64_t vpminnmqd_f64(float64x2_t __a) { + return (float64_t)__builtin_neon_vpminnmqd_f64(__a); } + +__ai float32_t vpmins_f32(float32x2_t __a) { + return (float32_t)__builtin_neon_vpmins_f32(__a); } +__ai float64_t vpminqd_f64(float64x2_t __a) { + return (float64_t)__builtin_neon_vpminqd_f64(__a); } + +__ai float32_t vmulxs_f32(float32_t __a, float32_t __b) { + return (float32_t)__builtin_neon_vmulxs_f32(__a, __b); } +__ai float64_t vmulxd_f64(float64_t __a, float64_t __b) { + return (float64_t)__builtin_neon_vmulxd_f64(__a, __b); } + +__ai float32_t vrecpes_f32(float32_t __a) { + return (float32_t)__builtin_neon_vrecpes_f32(__a); } +__ai float64_t vrecped_f64(float64_t __a) { + return (float64_t)__builtin_neon_vrecped_f64(__a); } + +__ai float32_t vrecpss_f32(float32_t __a, float32_t __b) { + return (float32_t)__builtin_neon_vrecpss_f32(__a, __b); } +__ai float64_t vrecpsd_f64(float64_t __a, float64_t __b) { + return (float64_t)__builtin_neon_vrecpsd_f64(__a, __b); } + +__ai float32_t vrecpxs_f32(float32_t __a) { + return (float32_t)__builtin_neon_vrecpxs_f32(__a); } +__ai float64_t vrecpxd_f64(float64_t __a) { + return (float64_t)__builtin_neon_vrecpxd_f64(__a); } + +__ai float32_t vrsqrtes_f32(float32_t __a) { + return (float32_t)__builtin_neon_vrsqrtes_f32(__a); } +__ai float64_t vrsqrted_f64(float64_t __a) { + return (float64_t)__builtin_neon_vrsqrted_f64(__a); } + +__ai float32_t vrsqrtss_f32(float32_t __a, float32_t __b) { + return (float32_t)__builtin_neon_vrsqrtss_f32(__a, __b); } +__ai float64_t vrsqrtsd_f64(float64_t __a, float64_t __b) { + return (float64_t)__builtin_neon_vrsqrtsd_f64(__a, __b); } + +__ai int64_t vnegd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vnegd_s64(__a); } + +__ai int8_t vqaddb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_neon_vqaddb_s8(__a, __b); } +__ai int16_t vqaddh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_neon_vqaddh_s16(__a, __b); } +__ai int32_t vqadds_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_neon_vqadds_s32(__a, __b); } +__ai int64_t vqaddd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vqaddd_s64(__a, __b); } +__ai uint8_t vqaddb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_neon_vqaddb_u8(__a, __b); } +__ai uint16_t vqaddh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_neon_vqaddh_u16(__a, __b); } +__ai uint32_t vqadds_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_neon_vqadds_u32(__a, __b); } +__ai uint64_t vqaddd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vqaddd_u64(__a, __b); } + +__ai int8_t vqrshlb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_neon_vqrshlb_s8(__a, __b); } +__ai int16_t vqrshlh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_neon_vqrshlh_s16(__a, __b); } +__ai int32_t vqrshls_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_neon_vqrshls_s32(__a, __b); } +__ai int64_t vqrshld_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vqrshld_s64(__a, __b); } +__ai uint8_t vqrshlb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_neon_vqrshlb_u8(__a, __b); } +__ai uint16_t vqrshlh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_neon_vqrshlh_u16(__a, __b); } +__ai uint32_t vqrshls_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_neon_vqrshls_u32(__a, __b); } +__ai uint64_t vqrshld_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vqrshld_u64(__a, __b); } + +__ai int8_t vqshlb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_neon_vqshlb_s8(__a, __b); } +__ai int16_t vqshlh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_neon_vqshlh_s16(__a, __b); } +__ai int32_t vqshls_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_neon_vqshls_s32(__a, __b); } +__ai int64_t vqshld_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vqshld_s64(__a, __b); } +__ai uint8_t vqshlb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_neon_vqshlb_u8(__a, __b); } +__ai uint16_t vqshlh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_neon_vqshlh_u16(__a, __b); } +__ai uint32_t vqshls_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_neon_vqshls_u32(__a, __b); } +__ai uint64_t vqshld_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vqshld_u64(__a, __b); } + +__ai int8_t vqsubb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_neon_vqsubb_s8(__a, __b); } +__ai int16_t vqsubh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_neon_vqsubh_s16(__a, __b); } +__ai int32_t vqsubs_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_neon_vqsubs_s32(__a, __b); } +__ai int64_t vqsubd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vqsubd_s64(__a, __b); } +__ai uint8_t vqsubb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_neon_vqsubb_u8(__a, __b); } +__ai uint16_t vqsubh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_neon_vqsubh_u16(__a, __b); } +__ai uint32_t vqsubs_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_neon_vqsubs_u32(__a, __b); } +__ai uint64_t vqsubd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vqsubd_u64(__a, __b); } + +__ai int64_t vrshld_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vrshld_s64(__a, __b); } +__ai uint64_t vrshld_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vrshld_u64(__a, __b); } + +__ai float64_t vcvtd_f64_s64(int64_t __a) { + return (float64_t)__builtin_neon_vcvtd_f64_s64(__a); } + +__ai float32_t vcvts_f32_s32(int32_t __a) { + return (float32_t)__builtin_neon_vcvts_f32_s32(__a); } + +#define vcvts_n_f32_s32(a, __b) __extension__ ({ \ + int32_t __a = (a); \ + (float32_t)__builtin_neon_vcvts_n_f32_s32(__a, __b); }) +#define vcvts_n_f32_u32(a, __b) __extension__ ({ \ + uint32_t __a = (a); \ + (float32_t)__builtin_neon_vcvts_n_f32_u32(__a, __b); }) + +#define vcvtd_n_f64_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (float64_t)__builtin_neon_vcvtd_n_f64_s64(__a, __b); }) +#define vcvtd_n_f64_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (float64_t)__builtin_neon_vcvtd_n_f64_u64(__a, __b); }) + +__ai int64_t vshld_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vshld_s64(__a, __b); } +__ai uint64_t vshld_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vshld_u64(__a, __b); } + +#define vshld_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int64_t)__builtin_neon_vshld_n_s64(__a, __b); }) +#define vshld_n_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (uint64_t)__builtin_neon_vshld_n_u64(__a, __b); }) + +#define vslid_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int64_t)__builtin_neon_vslid_n_s64(__a, __b); }) +#define vslid_n_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (uint64_t)__builtin_neon_vslid_n_u64(__a, __b); }) + +__ai int8_t vqabsb_s8(int8_t __a) { + return (int8_t)__builtin_neon_vqabsb_s8(__a); } +__ai int16_t vqabsh_s16(int16_t __a) { + return (int16_t)__builtin_neon_vqabsh_s16(__a); } +__ai int32_t vqabss_s32(int32_t __a) { + return (int32_t)__builtin_neon_vqabss_s32(__a); } +__ai int64_t vqabsd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vqabsd_s64(__a); } + +__ai int32_t vqdmlalh_s16(int32_t __a, int16_t __b, int16_t __c) { + return (int32_t)__builtin_neon_vqdmlalh_s16(__a, __b, __c); } +__ai int64_t vqdmlals_s32(int64_t __a, int32_t __b, int32_t __c) { + return (int64_t)__builtin_neon_vqdmlals_s32(__a, __b, __c); } + +__ai int32_t vqdmlslh_s16(int32_t __a, int16_t __b, int16_t __c) { + return (int32_t)__builtin_neon_vqdmlslh_s16(__a, __b, __c); } +__ai int64_t vqdmlsls_s32(int64_t __a, int32_t __b, int32_t __c) { + return (int64_t)__builtin_neon_vqdmlsls_s32(__a, __b, __c); } + +__ai int16_t vqdmulhh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_neon_vqdmulhh_s16(__a, __b); } +__ai int32_t vqdmulhs_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_neon_vqdmulhs_s32(__a, __b); } + +__ai int32_t vqdmullh_s16(int16_t __a, int16_t __b) { + return (int32_t)__builtin_neon_vqdmullh_s16(__a, __b); } +__ai int64_t vqdmulls_s32(int32_t __a, int32_t __b) { + return (int64_t)__builtin_neon_vqdmulls_s32(__a, __b); } + +__ai int8_t vqnegb_s8(int8_t __a) { + return (int8_t)__builtin_neon_vqnegb_s8(__a); } +__ai int16_t vqnegh_s16(int16_t __a) { + return (int16_t)__builtin_neon_vqnegh_s16(__a); } +__ai int32_t vqnegs_s32(int32_t __a) { + return (int32_t)__builtin_neon_vqnegs_s32(__a); } +__ai int64_t vqnegd_s64(int64_t __a) { + return (int64_t)__builtin_neon_vqnegd_s64(__a); } + +__ai int16_t vqrdmulhh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_neon_vqrdmulhh_s16(__a, __b); } +__ai int32_t vqrdmulhs_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_neon_vqrdmulhs_s32(__a, __b); } + +#define vqrshrnh_n_s16(a, __b) __extension__ ({ \ + int16_t __a = (a); \ + (int8_t)__builtin_neon_vqrshrnh_n_s16(__a, __b); }) +#define vqrshrns_n_s32(a, __b) __extension__ ({ \ + int32_t __a = (a); \ + (int16_t)__builtin_neon_vqrshrns_n_s32(__a, __b); }) +#define vqrshrnd_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int32_t)__builtin_neon_vqrshrnd_n_s64(__a, __b); }) +#define vqrshrnh_n_u16(a, __b) __extension__ ({ \ + uint16_t __a = (a); \ + (uint8_t)__builtin_neon_vqrshrnh_n_u16(__a, __b); }) +#define vqrshrns_n_u32(a, __b) __extension__ ({ \ + uint32_t __a = (a); \ + (uint16_t)__builtin_neon_vqrshrns_n_u32(__a, __b); }) +#define vqrshrnd_n_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (uint32_t)__builtin_neon_vqrshrnd_n_u64(__a, __b); }) + +#define vqrshrunh_n_s16(a, __b) __extension__ ({ \ + int16_t __a = (a); \ + (int8_t)__builtin_neon_vqrshrunh_n_s16(__a, __b); }) +#define vqrshruns_n_s32(a, __b) __extension__ ({ \ + int32_t __a = (a); \ + (int16_t)__builtin_neon_vqrshruns_n_s32(__a, __b); }) +#define vqrshrund_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int32_t)__builtin_neon_vqrshrund_n_s64(__a, __b); }) + +#define vqshlub_n_s8(a, __b) __extension__ ({ \ + int8_t __a = (a); \ + (int8_t)__builtin_neon_vqshlub_n_s8(__a, __b); }) +#define vqshluh_n_s16(a, __b) __extension__ ({ \ + int16_t __a = (a); \ + (int16_t)__builtin_neon_vqshluh_n_s16(__a, __b); }) +#define vqshlus_n_s32(a, __b) __extension__ ({ \ + int32_t __a = (a); \ + (int32_t)__builtin_neon_vqshlus_n_s32(__a, __b); }) +#define vqshlud_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int64_t)__builtin_neon_vqshlud_n_s64(__a, __b); }) + +#define vqshlb_n_s8(a, __b) __extension__ ({ \ + int8_t __a = (a); \ + (int8_t)__builtin_neon_vqshlb_n_s8(__a, __b); }) +#define vqshlh_n_s16(a, __b) __extension__ ({ \ + int16_t __a = (a); \ + (int16_t)__builtin_neon_vqshlh_n_s16(__a, __b); }) +#define vqshls_n_s32(a, __b) __extension__ ({ \ + int32_t __a = (a); \ + (int32_t)__builtin_neon_vqshls_n_s32(__a, __b); }) +#define vqshld_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int64_t)__builtin_neon_vqshld_n_s64(__a, __b); }) +#define vqshlb_n_u8(a, __b) __extension__ ({ \ + uint8_t __a = (a); \ + (uint8_t)__builtin_neon_vqshlb_n_u8(__a, __b); }) +#define vqshlh_n_u16(a, __b) __extension__ ({ \ + uint16_t __a = (a); \ + (uint16_t)__builtin_neon_vqshlh_n_u16(__a, __b); }) +#define vqshls_n_u32(a, __b) __extension__ ({ \ + uint32_t __a = (a); \ + (uint32_t)__builtin_neon_vqshls_n_u32(__a, __b); }) +#define vqshld_n_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (uint64_t)__builtin_neon_vqshld_n_u64(__a, __b); }) + +#define vqshrnh_n_s16(a, __b) __extension__ ({ \ + int16_t __a = (a); \ + (int8_t)__builtin_neon_vqshrnh_n_s16(__a, __b); }) +#define vqshrns_n_s32(a, __b) __extension__ ({ \ + int32_t __a = (a); \ + (int16_t)__builtin_neon_vqshrns_n_s32(__a, __b); }) +#define vqshrnd_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int32_t)__builtin_neon_vqshrnd_n_s64(__a, __b); }) +#define vqshrnh_n_u16(a, __b) __extension__ ({ \ + uint16_t __a = (a); \ + (uint8_t)__builtin_neon_vqshrnh_n_u16(__a, __b); }) +#define vqshrns_n_u32(a, __b) __extension__ ({ \ + uint32_t __a = (a); \ + (uint16_t)__builtin_neon_vqshrns_n_u32(__a, __b); }) +#define vqshrnd_n_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (uint32_t)__builtin_neon_vqshrnd_n_u64(__a, __b); }) + +#define vqshrunh_n_s16(a, __b) __extension__ ({ \ + int16_t __a = (a); \ + (int8_t)__builtin_neon_vqshrunh_n_s16(__a, __b); }) +#define vqshruns_n_s32(a, __b) __extension__ ({ \ + int32_t __a = (a); \ + (int16_t)__builtin_neon_vqshruns_n_s32(__a, __b); }) +#define vqshrund_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int32_t)__builtin_neon_vqshrund_n_s64(__a, __b); }) + +__ai int8_t vqmovnh_s16(int16_t __a) { + return (int8_t)__builtin_neon_vqmovnh_s16(__a); } +__ai int16_t vqmovns_s32(int32_t __a) { + return (int16_t)__builtin_neon_vqmovns_s32(__a); } +__ai int32_t vqmovnd_s64(int64_t __a) { + return (int32_t)__builtin_neon_vqmovnd_s64(__a); } + +__ai int8_t vqmovunh_s16(int16_t __a) { + return (int8_t)__builtin_neon_vqmovunh_s16(__a); } +__ai int16_t vqmovuns_s32(int32_t __a) { + return (int16_t)__builtin_neon_vqmovuns_s32(__a); } +__ai int32_t vqmovund_s64(int64_t __a) { + return (int32_t)__builtin_neon_vqmovund_s64(__a); } + +#define vsrid_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int64_t)__builtin_neon_vsrid_n_s64(__a, __b); }) +#define vsrid_n_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (uint64_t)__builtin_neon_vsrid_n_u64(__a, __b); }) + +#define vrshrd_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int64_t)__builtin_neon_vrshrd_n_s64(__a, __b); }) +#define vrshrd_n_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (uint64_t)__builtin_neon_vrshrd_n_u64(__a, __b); }) + +#define vrsrad_n_s64(a, b, __c) __extension__ ({ \ + int64_t __a = (a); int64_t __b = (b); \ + (int64_t)__builtin_neon_vrsrad_n_s64(__a, __b, __c); }) +#define vrsrad_n_u64(a, b, __c) __extension__ ({ \ + uint64_t __a = (a); uint64_t __b = (b); \ + (uint64_t)__builtin_neon_vrsrad_n_u64(__a, __b, __c); }) + +#define vshrd_n_s64(a, __b) __extension__ ({ \ + int64_t __a = (a); \ + (int64_t)__builtin_neon_vshrd_n_s64(__a, __b); }) +#define vshrd_n_u64(a, __b) __extension__ ({ \ + uint64_t __a = (a); \ + (uint64_t)__builtin_neon_vshrd_n_u64(__a, __b); }) + +#define vsrad_n_s64(a, b, __c) __extension__ ({ \ + int64_t __a = (a); int64_t __b = (b); \ + (int64_t)__builtin_neon_vsrad_n_s64(__a, __b, __c); }) +#define vsrad_n_u64(a, b, __c) __extension__ ({ \ + uint64_t __a = (a); uint64_t __b = (b); \ + (uint64_t)__builtin_neon_vsrad_n_u64(__a, __b, __c); }) + +__ai int64_t vsubd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vsubd_s64(__a, __b); } +__ai uint64_t vsubd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vsubd_u64(__a, __b); } + +__ai int8_t vuqaddb_s8(int8_t __a, int8_t __b) { + return (int8_t)__builtin_neon_vuqaddb_s8(__a, __b); } +__ai int16_t vuqaddh_s16(int16_t __a, int16_t __b) { + return (int16_t)__builtin_neon_vuqaddh_s16(__a, __b); } +__ai int32_t vuqadds_s32(int32_t __a, int32_t __b) { + return (int32_t)__builtin_neon_vuqadds_s32(__a, __b); } +__ai int64_t vuqaddd_s64(int64_t __a, int64_t __b) { + return (int64_t)__builtin_neon_vuqaddd_s64(__a, __b); } + +__ai float64_t vcvtd_f64_u64(uint64_t __a) { + return (float64_t)__builtin_neon_vcvtd_f64_u64(__a); } + +__ai float32_t vcvts_f32_u32(uint32_t __a) { + return (float32_t)__builtin_neon_vcvts_f32_u32(__a); } + +__ai uint8_t vqmovnh_u16(uint16_t __a) { + return (uint8_t)__builtin_neon_vqmovnh_u16(__a); } +__ai uint16_t vqmovns_u32(uint32_t __a) { + return (uint16_t)__builtin_neon_vqmovns_u32(__a); } +__ai uint32_t vqmovnd_u64(uint64_t __a) { + return (uint32_t)__builtin_neon_vqmovnd_u64(__a); } + +__ai uint8_t vsqaddb_u8(uint8_t __a, uint8_t __b) { + return (uint8_t)__builtin_neon_vsqaddb_u8(__a, __b); } +__ai uint16_t vsqaddh_u16(uint16_t __a, uint16_t __b) { + return (uint16_t)__builtin_neon_vsqaddh_u16(__a, __b); } +__ai uint32_t vsqadds_u32(uint32_t __a, uint32_t __b) { + return (uint32_t)__builtin_neon_vsqadds_u32(__a, __b); } +__ai uint64_t vsqaddd_u64(uint64_t __a, uint64_t __b) { + return (uint64_t)__builtin_neon_vsqaddd_u64(__a, __b); } + +#define vset_lane_f64(a, b, __c) __extension__ ({ \ + float64_t __a = (a); float64x1_t __b = (b); \ + (float64x1_t)__builtin_neon_vset_lane_f64(__a, __b, __c); }) +#define vsetq_lane_f64(a, b, __c) __extension__ ({ \ + float64_t __a = (a); float64x2_t __b = (b); \ + (float64x2_t)__builtin_neon_vsetq_lane_f64(__a, __b, __c); }) + +__ai uint32x4_t vsha1cq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_neon_vsha1cq_u32((int32x4_t)__a, __b, (int32x4_t)__c); } + +__ai uint32_t vsha1h_u32(uint32_t __a) { + return (uint32_t)__builtin_neon_vsha1h_u32(__a); } + +__ai uint32x4_t vsha1mq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_neon_vsha1mq_u32((int32x4_t)__a, __b, (int32x4_t)__c); } + +__ai uint32x4_t vsha1pq_u32(uint32x4_t __a, uint32_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_neon_vsha1pq_u32((int32x4_t)__a, __b, (int32x4_t)__c); } + +__ai uint32x4_t vsha1su0q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_neon_vsha1su0q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai uint32x4_t vsha1su1q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vsha1su1q_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai uint32x4_t vsha256hq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_neon_vsha256hq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai uint32x4_t vsha256h2q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_neon_vsha256h2q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +__ai uint32x4_t vsha256su0q_u32(uint32x4_t __a, uint32x4_t __b) { + return (uint32x4_t)__builtin_neon_vsha256su0q_v((int8x16_t)__a, (int8x16_t)__b, 50); } + +__ai uint32x4_t vsha256su1q_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return (uint32x4_t)__builtin_neon_vsha256su1q_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, 50); } + +#define vshll_high_n_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + int8x8_t __a1 = vget_high_s8(__a); \ + (int16x8_t)vshll_n_s8(__a1, __b); }) +#define vshll_high_n_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + int16x4_t __a1 = vget_high_s16(__a); \ + (int32x4_t)vshll_n_s16(__a1, __b); }) +#define vshll_high_n_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + int32x2_t __a1 = vget_high_s32(__a); \ + (int64x2_t)vshll_n_s32(__a1, __b); }) +#define vshll_high_n_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + uint8x8_t __a1 = vget_high_u8(__a); \ + (uint16x8_t)vshll_n_u8(__a1, __b); }) +#define vshll_high_n_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + uint16x4_t __a1 = vget_high_u16(__a); \ + (uint32x4_t)vshll_n_u16(__a1, __b); }) +#define vshll_high_n_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + uint32x2_t __a1 = vget_high_u32(__a); \ + (uint64x2_t)vshll_n_u32(__a1, __b); }) + +#define vshrn_high_n_s16(a, b, __c) __extension__ ({ \ + int8x8_t __a = (a); int16x8_t __b = (b); \ + (int8x16_t)vcombine_s16(__a, vshrn_n_s16(__b, __c)); }) +#define vshrn_high_n_s32(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int32x4_t __b = (b); \ + (int16x8_t)vcombine_s32(__a, vshrn_n_s32(__b, __c)); }) +#define vshrn_high_n_s64(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int64x2_t __b = (b); \ + (int32x4_t)vcombine_s64(__a, vshrn_n_s64(__b, __c)); }) +#define vshrn_high_n_u16(a, b, __c) __extension__ ({ \ + uint8x8_t __a = (a); uint16x8_t __b = (b); \ + (uint8x16_t)vcombine_u16(__a, vshrn_n_u16(__b, __c)); }) +#define vshrn_high_n_u32(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint32x4_t __b = (b); \ + (uint16x8_t)vcombine_u32(__a, vshrn_n_u32(__b, __c)); }) +#define vshrn_high_n_u64(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint64x2_t __b = (b); \ + (uint32x4_t)vcombine_u64(__a, vshrn_n_u64(__b, __c)); }) + +#define vst1q_f64(__a, b) __extension__ ({ \ + float64x2_t __b = (b); \ + __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 40); }) +#define vst1_f64(__a, b) __extension__ ({ \ + float64x1_t __b = (b); \ + __builtin_neon_vst1_v(__a, (int8x8_t)__b, 8); }) + +#define vst2q_u64(__a, b) __extension__ ({ \ + uint64x2x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 51); }) +#define vst2q_s64(__a, b) __extension__ ({ \ + int64x2x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 35); }) +#define vst2q_f64(__a, b) __extension__ ({ \ + float64x2x2_t __b = (b); \ + __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 40); }) +#define vst2_f64(__a, b) __extension__ ({ \ + float64x1x2_t __b = (b); \ + __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 8); }) + +#define vst3q_u64(__a, b) __extension__ ({ \ + uint64x2x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 51); }) +#define vst3q_s64(__a, b) __extension__ ({ \ + int64x2x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 35); }) +#define vst3q_f64(__a, b) __extension__ ({ \ + float64x2x3_t __b = (b); \ + __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], 40); }) +#define vst3_f64(__a, b) __extension__ ({ \ + float64x1x3_t __b = (b); \ + __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], 8); }) + +#define vst4q_u64(__a, b) __extension__ ({ \ + uint64x2x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 51); }) +#define vst4q_s64(__a, b) __extension__ ({ \ + int64x2x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 35); }) +#define vst4q_f64(__a, b) __extension__ ({ \ + float64x2x4_t __b = (b); \ + __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], 40); }) +#define vst4_f64(__a, b) __extension__ ({ \ + float64x1x4_t __b = (b); \ + __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], 8); }) + +__ai float64x2_t vsubq_f64(float64x2_t __a, float64x2_t __b) { + return __a - __b; } + +__ai int16x8_t vabal_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) { + return vabal_s8(__a, vget_high_s8(__b), vget_high_s8(__c)); } +__ai int32x4_t vabal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return vabal_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); } +__ai int64x2_t vabal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return vabal_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); } +__ai uint16x8_t vabal_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) { + return vabal_u8(__a, vget_high_u8(__b), vget_high_u8(__c)); } +__ai uint32x4_t vabal_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) { + return vabal_u16(__a, vget_high_u16(__b), vget_high_u16(__c)); } +__ai uint64x2_t vabal_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) { + return vabal_u32(__a, vget_high_u32(__b), vget_high_u32(__c)); } + + +__ai int8x16_t vaddhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) { + return vcombine_s8(__a, vaddhn_s16(__b, __c)); } +__ai int16x8_t vaddhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) { + return vcombine_s16(__a, vaddhn_s32(__b, __c)); } +__ai int32x4_t vaddhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) { + return vcombine_s32(__a, vaddhn_s64(__b, __c)); } +__ai uint8x16_t vaddhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return vcombine_u8(__a, vaddhn_u16(__b, __c)); } +__ai uint16x8_t vaddhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return vcombine_u16(__a, vaddhn_u32(__b, __c)); } +__ai uint32x4_t vaddhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return vcombine_u32(__a, vaddhn_u64(__b, __c)); } + +__ai int16_t vaddlv_s8(int8x8_t __a) { + return (int16_t)__builtin_neon_vaddlv_s8(__a); } +__ai int32_t vaddlv_s16(int16x4_t __a) { + return (int32_t)__builtin_neon_vaddlv_s16(__a); } +__ai int64_t vaddlv_s32(int32x2_t __a) { + return (int64_t)__builtin_neon_vaddlv_s32(__a); } +__ai uint16_t vaddlv_u8(uint8x8_t __a) { + return (uint16_t)__builtin_neon_vaddlv_u8((int8x8_t)__a); } +__ai uint32_t vaddlv_u16(uint16x4_t __a) { + return (uint32_t)__builtin_neon_vaddlv_u16((int16x4_t)__a); } +__ai uint64_t vaddlv_u32(uint32x2_t __a) { + return (uint64_t)__builtin_neon_vaddlv_u32((int32x2_t)__a); } +__ai int16_t vaddlvq_s8(int8x16_t __a) { + return (int16_t)__builtin_neon_vaddlvq_s8(__a); } +__ai int32_t vaddlvq_s16(int16x8_t __a) { + return (int32_t)__builtin_neon_vaddlvq_s16(__a); } +__ai int64_t vaddlvq_s32(int32x4_t __a) { + return (int64_t)__builtin_neon_vaddlvq_s32(__a); } +__ai uint16_t vaddlvq_u8(uint8x16_t __a) { + return (uint16_t)__builtin_neon_vaddlvq_u8((int8x16_t)__a); } +__ai uint32_t vaddlvq_u16(uint16x8_t __a) { + return (uint32_t)__builtin_neon_vaddlvq_u16((int16x8_t)__a); } +__ai uint64_t vaddlvq_u32(uint32x4_t __a) { + return (uint64_t)__builtin_neon_vaddlvq_u32((int32x4_t)__a); } + +__ai int16x8_t vaddl_high_s8(int8x16_t __a, int8x16_t __b) { + return vmovl_high_s8(__a) + vmovl_high_s8(__b); } +__ai int32x4_t vaddl_high_s16(int16x8_t __a, int16x8_t __b) { + return vmovl_high_s16(__a) + vmovl_high_s16(__b); } +__ai int64x2_t vaddl_high_s32(int32x4_t __a, int32x4_t __b) { + return vmovl_high_s32(__a) + vmovl_high_s32(__b); } +__ai uint16x8_t vaddl_high_u8(uint8x16_t __a, uint8x16_t __b) { + return vmovl_high_u8(__a) + vmovl_high_u8(__b); } +__ai uint32x4_t vaddl_high_u16(uint16x8_t __a, uint16x8_t __b) { + return vmovl_high_u16(__a) + vmovl_high_u16(__b); } +__ai uint64x2_t vaddl_high_u32(uint32x4_t __a, uint32x4_t __b) { + return vmovl_high_u32(__a) + vmovl_high_u32(__b); } + +__ai int8_t vaddv_s8(int8x8_t __a) { + return (int8_t)__builtin_neon_vaddv_s8(__a); } +__ai int16_t vaddv_s16(int16x4_t __a) { + return (int16_t)__builtin_neon_vaddv_s16(__a); } +__ai int32_t vaddv_s32(int32x2_t __a) { + return (int32_t)__builtin_neon_vaddv_s32(__a); } +__ai uint8_t vaddv_u8(uint8x8_t __a) { + return (uint8_t)__builtin_neon_vaddv_u8((int8x8_t)__a); } +__ai uint16_t vaddv_u16(uint16x4_t __a) { + return (uint16_t)__builtin_neon_vaddv_u16((int16x4_t)__a); } +__ai uint32_t vaddv_u32(uint32x2_t __a) { + return (uint32_t)__builtin_neon_vaddv_u32((int32x2_t)__a); } +__ai int8_t vaddvq_s8(int8x16_t __a) { + return (int8_t)__builtin_neon_vaddvq_s8(__a); } +__ai int16_t vaddvq_s16(int16x8_t __a) { + return (int16_t)__builtin_neon_vaddvq_s16(__a); } +__ai int32_t vaddvq_s32(int32x4_t __a) { + return (int32_t)__builtin_neon_vaddvq_s32(__a); } +__ai uint8_t vaddvq_u8(uint8x16_t __a) { + return (uint8_t)__builtin_neon_vaddvq_u8((int8x16_t)__a); } +__ai uint16_t vaddvq_u16(uint16x8_t __a) { + return (uint16_t)__builtin_neon_vaddvq_u16((int16x8_t)__a); } +__ai uint32_t vaddvq_u32(uint32x4_t __a) { + return (uint32_t)__builtin_neon_vaddvq_u32((int32x4_t)__a); } + +__ai int16x8_t vaddw_high_s8(int16x8_t __a, int8x16_t __b) { + return __a + vmovl_high_s8(__b); } +__ai int32x4_t vaddw_high_s16(int32x4_t __a, int16x8_t __b) { + return __a + vmovl_high_s16(__b); } +__ai int64x2_t vaddw_high_s32(int64x2_t __a, int32x4_t __b) { + return __a + vmovl_high_s32(__b); } +__ai uint16x8_t vaddw_high_u8(uint16x8_t __a, uint8x16_t __b) { + return __a + vmovl_high_u8(__b); } +__ai uint32x4_t vaddw_high_u16(uint32x4_t __a, uint16x8_t __b) { + return __a + vmovl_high_u16(__b); } +__ai uint64x2_t vaddw_high_u32(uint64x2_t __a, uint32x4_t __b) { + return __a + vmovl_high_u32(__b); } + +#define vdup_lane_f16(a, __b) __extension__ ({ \ + float16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_lane_f64(a, __b) __extension__ ({ \ + float64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b); }) +#define vdupq_lane_f16(a, __b) __extension__ ({ \ + float16x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_lane_f64(a, __b) __extension__ ({ \ + float64x1_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) + +#define vdup_laneq_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_laneq_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_laneq_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdup_laneq_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b); }) +#define vdup_laneq_p8(a, __b) __extension__ ({ \ + poly8x16_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_laneq_p16(a, __b) __extension__ ({ \ + poly16x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_laneq_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdup_laneq_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_laneq_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdup_laneq_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b); }) +#define vdup_laneq_f16(a, __b) __extension__ ({ \ + float16x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdup_laneq_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdup_laneq_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b); }) +#define vdupq_laneq_s8(a, __b) __extension__ ({ \ + int8x16_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_laneq_s16(a, __b) __extension__ ({ \ + int16x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_laneq_s32(a, __b) __extension__ ({ \ + int32x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdupq_laneq_s64(a, __b) __extension__ ({ \ + int64x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdupq_laneq_p8(a, __b) __extension__ ({ \ + poly8x16_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_laneq_p16(a, __b) __extension__ ({ \ + poly16x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_laneq_u8(a, __b) __extension__ ({ \ + uint8x16_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_laneq_u16(a, __b) __extension__ ({ \ + uint16x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_laneq_u32(a, __b) __extension__ ({ \ + uint32x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdupq_laneq_u64(a, __b) __extension__ ({ \ + uint64x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) +#define vdupq_laneq_f16(a, __b) __extension__ ({ \ + float16x8_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) +#define vdupq_laneq_f32(a, __b) __extension__ ({ \ + float32x4_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) +#define vdupq_laneq_f64(a, __b) __extension__ ({ \ + float64x2_t __a = (a); \ + __builtin_shufflevector(__a, __a, __b, __b); }) + +#define vext_f64(a, b, __c) __extension__ ({ \ + float64x1_t __a = (a); float64x1_t __b = (b); \ + (float64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) +#define vextq_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); \ + (float64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 40); }) + +#define vfma_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ + (float32x2_t)__builtin_neon_vfma_lane_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, __d, 7); }) +#define vfma_lane_f64(a, b, c, __d) __extension__ ({ \ + float64x1_t __a = (a); float64x1_t __b = (b); float64x1_t __c = (c); \ + (float64x1_t)__builtin_neon_vfma_lane_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_t)__c, __d, 8); }) +#define vfmaq_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ + (float32x4_t)__builtin_neon_vfmaq_lane_v((int8x16_t)__a, (int8x16_t)__b, (int8x8_t)__c, __d, 39); }) +#define vfmaq_lane_f64(a, b, c, __d) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); float64x1_t __c = (c); \ + (float64x2_t)__builtin_neon_vfmaq_lane_v((int8x16_t)__a, (int8x16_t)__b, (int8x8_t)__c, __d, 40); }) + +#define vfma_laneq_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x4_t __c = (c); \ + (float32x2_t)__builtin_neon_vfma_laneq_v((int8x8_t)__a, (int8x8_t)__b, (int8x16_t)__c, __d, 7); }) +#define vfma_laneq_f64(a, b, c, __d) __extension__ ({ \ + float64x1_t __a = (a); float64x1_t __b = (b); float64x2_t __c = (c); \ + (float64x1_t)__builtin_neon_vfma_laneq_v((int8x8_t)__a, (int8x8_t)__b, (int8x16_t)__c, __d, 8); }) +#define vfmaq_laneq_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \ + (float32x4_t)__builtin_neon_vfmaq_laneq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, __d, 39); }) +#define vfmaq_laneq_f64(a, b, c, __d) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); float64x2_t __c = (c); \ + (float64x2_t)__builtin_neon_vfmaq_laneq_v((int8x16_t)__a, (int8x16_t)__b, (int8x16_t)__c, __d, 40); }) + +#define vfms_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ + float32x2_t __a1 = __a; \ + float32x2_t __b1 = __b; \ + float32x2_t __c1 = __c; \ + vfma_lane_f32(__a1, __b1, -__c1, __d); }) +#define vfms_lane_f64(a, b, c, __d) __extension__ ({ \ + float64x1_t __a = (a); float64x1_t __b = (b); float64x1_t __c = (c); \ + float64x1_t __a1 = __a; \ + float64x1_t __b1 = __b; \ + float64x1_t __c1 = __c; \ + vfma_lane_f64(__a1, __b1, -__c1, __d); }) +#define vfmsq_lane_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ + float32x4_t __a1 = __a; \ + float32x4_t __b1 = __b; \ + float32x2_t __c1 = __c; \ + vfmaq_lane_f32(__a1, __b1, -__c1, __d); }) +#define vfmsq_lane_f64(a, b, c, __d) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); float64x1_t __c = (c); \ + float64x2_t __a1 = __a; \ + float64x2_t __b1 = __b; \ + float64x1_t __c1 = __c; \ + vfmaq_lane_f64(__a1, __b1, -__c1, __d); }) + +#define vfms_laneq_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x4_t __c = (c); \ + float32x2_t __a1 = __a; \ + float32x2_t __b1 = __b; \ + float32x4_t __c1 = __c; \ + vfma_laneq_f32(__a1, __b1, -__c1, __d); }) +#define vfms_laneq_f64(a, b, c, __d) __extension__ ({ \ + float64x1_t __a = (a); float64x1_t __b = (b); float64x2_t __c = (c); \ + float64x1_t __a1 = __a; \ + float64x1_t __b1 = __b; \ + float64x2_t __c1 = __c; \ + vfma_laneq_f64(__a1, __b1, -__c1, __d); }) +#define vfmsq_laneq_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \ + float32x4_t __a1 = __a; \ + float32x4_t __b1 = __b; \ + float32x4_t __c1 = __c; \ + vfmaq_laneq_f32(__a1, __b1, -__c1, __d); }) +#define vfmsq_laneq_f64(a, b, c, __d) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); float64x2_t __c = (c); \ + float64x2_t __a1 = __a; \ + float64x2_t __b1 = __b; \ + float64x2_t __c1 = __c; \ + vfmaq_laneq_f64(__a1, __b1, -__c1, __d); }) + +__ai int8_t vmaxv_s8(int8x8_t __a) { + return (int8_t)__builtin_neon_vmaxv_s8(__a); } +__ai int16_t vmaxv_s16(int16x4_t __a) { + return (int16_t)__builtin_neon_vmaxv_s16(__a); } +__ai int32_t vmaxv_s32(int32x2_t __a) { + return (int32_t)__builtin_neon_vmaxv_s32(__a); } +__ai uint8_t vmaxv_u8(uint8x8_t __a) { + return (uint8_t)__builtin_neon_vmaxv_u8((int8x8_t)__a); } +__ai uint16_t vmaxv_u16(uint16x4_t __a) { + return (uint16_t)__builtin_neon_vmaxv_u16((int16x4_t)__a); } +__ai uint32_t vmaxv_u32(uint32x2_t __a) { + return (uint32_t)__builtin_neon_vmaxv_u32((int32x2_t)__a); } +__ai int8_t vmaxvq_s8(int8x16_t __a) { + return (int8_t)__builtin_neon_vmaxvq_s8(__a); } +__ai int16_t vmaxvq_s16(int16x8_t __a) { + return (int16_t)__builtin_neon_vmaxvq_s16(__a); } +__ai int32_t vmaxvq_s32(int32x4_t __a) { + return (int32_t)__builtin_neon_vmaxvq_s32(__a); } +__ai uint8_t vmaxvq_u8(uint8x16_t __a) { + return (uint8_t)__builtin_neon_vmaxvq_u8((int8x16_t)__a); } +__ai uint16_t vmaxvq_u16(uint16x8_t __a) { + return (uint16_t)__builtin_neon_vmaxvq_u16((int16x8_t)__a); } +__ai uint32_t vmaxvq_u32(uint32x4_t __a) { + return (uint32_t)__builtin_neon_vmaxvq_u32((int32x4_t)__a); } +__ai float32_t vmaxvq_f32(float32x4_t __a) { + return (float32_t)__builtin_neon_vmaxvq_f32(__a); } + +__ai int8_t vminv_s8(int8x8_t __a) { + return (int8_t)__builtin_neon_vminv_s8(__a); } +__ai int16_t vminv_s16(int16x4_t __a) { + return (int16_t)__builtin_neon_vminv_s16(__a); } +__ai int32_t vminv_s32(int32x2_t __a) { + return (int32_t)__builtin_neon_vminv_s32(__a); } +__ai uint8_t vminv_u8(uint8x8_t __a) { + return (uint8_t)__builtin_neon_vminv_u8((int8x8_t)__a); } +__ai uint16_t vminv_u16(uint16x4_t __a) { + return (uint16_t)__builtin_neon_vminv_u16((int16x4_t)__a); } +__ai uint32_t vminv_u32(uint32x2_t __a) { + return (uint32_t)__builtin_neon_vminv_u32((int32x2_t)__a); } +__ai int8_t vminvq_s8(int8x16_t __a) { + return (int8_t)__builtin_neon_vminvq_s8(__a); } +__ai int16_t vminvq_s16(int16x8_t __a) { + return (int16_t)__builtin_neon_vminvq_s16(__a); } +__ai int32_t vminvq_s32(int32x4_t __a) { + return (int32_t)__builtin_neon_vminvq_s32(__a); } +__ai uint8_t vminvq_u8(uint8x16_t __a) { + return (uint8_t)__builtin_neon_vminvq_u8((int8x16_t)__a); } +__ai uint16_t vminvq_u16(uint16x8_t __a) { + return (uint16_t)__builtin_neon_vminvq_u16((int16x8_t)__a); } +__ai uint32_t vminvq_u32(uint32x4_t __a) { + return (uint32_t)__builtin_neon_vminvq_u32((int32x4_t)__a); } +__ai float32_t vminvq_f32(float32x4_t __a) { + return (float32_t)__builtin_neon_vminvq_f32(__a); } + +__ai int16x8_t vmlal_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) { + return vmlal_s8(__a, vget_high_s8(__b), vget_high_s8(__c)); } +__ai int32x4_t vmlal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return vmlal_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); } +__ai int64x2_t vmlal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return vmlal_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); } +__ai uint16x8_t vmlal_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) { + return vmlal_u8(__a, vget_high_u8(__b), vget_high_u8(__c)); } +__ai uint32x4_t vmlal_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) { + return vmlal_u16(__a, vget_high_u16(__b), vget_high_u16(__c)); } +__ai uint64x2_t vmlal_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) { + return vmlal_u32(__a, vget_high_u32(__b), vget_high_u32(__c)); } + +#define vmlal_high_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + __a + vmull_s16(vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_high_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + __a + vmull_s32(vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlal_high_lane_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ + __a + vmull_u16(vget_high_u16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_high_lane_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ + __a + vmull_u32(vget_high_u32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vmlal_high_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + __a + vmull_s16(vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_high_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + __a + vmull_s32(vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlal_high_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \ + __a + vmull_u16(vget_high_u16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_high_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \ + __a + vmull_u32(vget_high_u32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vmlal_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlal_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \ + __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlal_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \ + __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vmla_laneq_s16(a, b, c, __d) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmla_laneq_s32(a, b, c, __d) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmla_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmla_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmla_laneq_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlaq_laneq_s16(a, b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlaq_laneq_s32(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlaq_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlaq_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlaq_laneq_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \ + __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + +__ai int16x8_t vmlsl_high_s8(int16x8_t __a, int8x16_t __b, int8x16_t __c) { + return vmlsl_s8(__a, vget_high_s8(__b), vget_high_s8(__c)); } +__ai int32x4_t vmlsl_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return vmlsl_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); } +__ai int64x2_t vmlsl_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return vmlsl_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); } +__ai uint16x8_t vmlsl_high_u8(uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) { + return vmlsl_u8(__a, vget_high_u8(__b), vget_high_u8(__c)); } +__ai uint32x4_t vmlsl_high_u16(uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) { + return vmlsl_u16(__a, vget_high_u16(__b), vget_high_u16(__c)); } +__ai uint64x2_t vmlsl_high_u32(uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) { + return vmlsl_u32(__a, vget_high_u32(__b), vget_high_u32(__c)); } + +#define vmlsl_high_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + __a - vmull_s16(vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_high_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + __a - vmull_s32(vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsl_high_lane_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ + __a - vmull_u16(vget_high_u16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_high_lane_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ + __a - vmull_u32(vget_high_u32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vmlsl_high_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + __a - vmull_s16(vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_high_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + __a - vmull_s32(vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsl_high_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \ + __a - vmull_u16(vget_high_u16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_high_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \ + __a - vmull_u32(vget_high_u32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vmlsl_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsl_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \ + __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsl_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \ + __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vmls_laneq_s16(a, b, c, __d) __extension__ ({ \ + int16x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmls_laneq_s32(a, b, c, __d) __extension__ ({ \ + int32x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmls_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x8_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmls_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmls_laneq_f32(a, b, c, __d) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); float32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) +#define vmlsq_laneq_s16(a, b, c, __d) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsq_laneq_s32(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsq_laneq_u16(a, b, c, __d) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x8_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, __d, __d)); }) +#define vmlsq_laneq_u32(a, b, c, __d) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vmlsq_laneq_f32(a, b, c, __d) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); float32x4_t __c = (c); \ + __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) + + + +#define vmull_high_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + vmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_high_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + vmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmull_high_lane_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x4_t __b = (b); \ + vmull_u16(vget_high_u16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_high_lane_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x2_t __b = (b); \ + vmull_u32(vget_high_u32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vmull_high_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + vmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_high_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + vmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmull_high_laneq_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + vmull_u16(vget_high_u16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_high_laneq_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + vmull_u32(vget_high_u32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vmull_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmull_laneq_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x8_t __b = (b); \ + vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmull_laneq_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x4_t __b = (b); \ + vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vmulx_lane_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x2_t __b = (b); \ + vmulx_f32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmulx_lane_f64(a, b, __c) __extension__ ({ \ + float64x1_t __a = (a); float64x1_t __b = (b); \ + vmulx_f64(__a, __builtin_shufflevector(__b, __b, __c)); }) +#define vmulxq_lane_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x2_t __b = (b); \ + vmulxq_f32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmulxq_lane_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x1_t __b = (b); \ + vmulxq_f64(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vmulx_laneq_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x4_t __b = (b); \ + vmulx_f32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vmulx_laneq_f64(a, b, __c) __extension__ ({ \ + float64x1_t __a = (a); float64x2_t __b = (b); \ + vmulx_f64(__a, __builtin_shufflevector(__b, __b, __c)); }) +#define vmulxq_laneq_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); \ + vmulxq_f32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vmulxq_laneq_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); \ + vmulxq_f64(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vmul_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmul_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmul_laneq_f32(a, b, __c) __extension__ ({ \ + float32x2_t __a = (a); float32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmul_laneq_f64(a, b, __c) __extension__ ({ \ + float64x1_t __a = (a); float64x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c); }) +#define vmul_laneq_u16(a, b, __c) __extension__ ({ \ + uint16x4_t __a = (a); uint16x8_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmul_laneq_u32(a, b, __c) __extension__ ({ \ + uint32x2_t __a = (a); uint32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) +#define vmulq_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) +#define vmulq_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_laneq_f32(a, b, __c) __extension__ ({ \ + float32x4_t __a = (a); float32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_laneq_u16(a, b, __c) __extension__ ({ \ + uint16x8_t __a = (a); uint16x8_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c); }) +#define vmulq_laneq_u32(a, b, __c) __extension__ ({ \ + uint32x4_t __a = (a); uint32x4_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) +#define vmulq_laneq_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x2_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) + +#define vmul_lane_f64(a, b, __c) __extension__ ({ \ + float64x1_t __a = (a); float64x1_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c); }) +#define vmulq_lane_f64(a, b, __c) __extension__ ({ \ + float64x2_t __a = (a); float64x1_t __b = (b); \ + __a * __builtin_shufflevector(__b, __b, __c, __c); }) + +__ai int32x4_t vqdmlal_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return vqdmlal_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); } +__ai int64x2_t vqdmlal_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return vqdmlal_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); } + +#define vqdmlal_high_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + vqdmlal_s16(__a, vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlal_high_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + vqdmlal_s32(__a, vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vqdmlal_high_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + vqdmlal_s16(__a, vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlal_high_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + vqdmlal_s32(__a, vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vqdmlal_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlal_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +__ai int32x4_t vqdmlsl_high_s16(int32x4_t __a, int16x8_t __b, int16x8_t __c) { + return vqdmlsl_s16(__a, vget_high_s16(__b), vget_high_s16(__c)); } +__ai int64x2_t vqdmlsl_high_s32(int64x2_t __a, int32x4_t __b, int32x4_t __c) { + return vqdmlsl_s32(__a, vget_high_s32(__b), vget_high_s32(__c)); } + +#define vqdmlsl_high_lane_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ + vqdmlsl_s16(__a, vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlsl_high_lane_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ + vqdmlsl_s32(__a, vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vqdmlsl_high_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x8_t __b = (b); int16x8_t __c = (c); \ + vqdmlsl_s16(__a, vget_high_s16(__b), __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlsl_high_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x4_t __b = (b); int32x4_t __c = (c); \ + vqdmlsl_s32(__a, vget_high_s32(__b), __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vqdmlsl_laneq_s16(a, b, c, __d) __extension__ ({ \ + int32x4_t __a = (a); int16x4_t __b = (b); int16x8_t __c = (c); \ + vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) +#define vqdmlsl_laneq_s32(a, b, c, __d) __extension__ ({ \ + int64x2_t __a = (a); int32x2_t __b = (b); int32x4_t __c = (c); \ + vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) + +#define vqdmulh_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmulh_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vqdmulhq_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) +#define vqdmulhq_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) + +__ai int32x4_t vqdmull_high_s16(int16x8_t __a, int16x8_t __b) { + return vqdmull_s16(vget_high_s16(__a), vget_high_s16(__b)); } +__ai int64x2_t vqdmull_high_s32(int32x4_t __a, int32x4_t __b) { + return vqdmull_s32(vget_high_s32(__a), vget_high_s32(__b)); } + +#define vqdmull_high_lane_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x4_t __b = (b); \ + vqdmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmull_high_lane_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x2_t __b = (b); \ + vqdmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vqdmull_high_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + vqdmull_s16(vget_high_s16(__a), __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmull_high_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + vqdmull_s32(vget_high_s32(__a), __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vqdmull_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqdmull_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) + +#define vqrdmulh_laneq_s16(a, b, __c) __extension__ ({ \ + int16x4_t __a = (a); int16x8_t __b = (b); \ + vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) +#define vqrdmulh_laneq_s32(a, b, __c) __extension__ ({ \ + int32x2_t __a = (a); int32x4_t __b = (b); \ + vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) +#define vqrdmulhq_laneq_s16(a, b, __c) __extension__ ({ \ + int16x8_t __a = (a); int16x8_t __b = (b); \ + vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) +#define vqrdmulhq_laneq_s32(a, b, __c) __extension__ ({ \ + int32x4_t __a = (a); int32x4_t __b = (b); \ + vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) + +__ai int8x16_t vraddhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) { + return vcombine_s8(__a, vraddhn_s16(__b, __c)); } +__ai int16x8_t vraddhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) { + return vcombine_s16(__a, vraddhn_s32(__b, __c)); } +__ai int32x4_t vraddhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) { + return vcombine_s32(__a, vraddhn_s64(__b, __c)); } +__ai uint8x16_t vraddhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return vcombine_u8(__a, vraddhn_u16(__b, __c)); } +__ai uint16x8_t vraddhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return vcombine_u16(__a, vraddhn_u32(__b, __c)); } +__ai uint32x4_t vraddhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return vcombine_u32(__a, vraddhn_u64(__b, __c)); } + +__ai int8x16_t vrsubhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) { + return vcombine_s8(__a, vrsubhn_s16(__b, __c)); } +__ai int16x8_t vrsubhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) { + return vcombine_s16(__a, vrsubhn_s32(__b, __c)); } +__ai int32x4_t vrsubhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) { + return vcombine_s32(__a, vrsubhn_s64(__b, __c)); } +__ai uint8x16_t vrsubhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return vcombine_u8(__a, vrsubhn_u16(__b, __c)); } +__ai uint16x8_t vrsubhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return vcombine_u16(__a, vrsubhn_u32(__b, __c)); } +__ai uint32x4_t vrsubhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return vcombine_u32(__a, vrsubhn_u64(__b, __c)); } + +__ai int8x16_t vsubhn_high_s16(int8x8_t __a, int16x8_t __b, int16x8_t __c) { + return vcombine_s8(__a, vsubhn_s16(__b, __c)); } +__ai int16x8_t vsubhn_high_s32(int16x4_t __a, int32x4_t __b, int32x4_t __c) { + return vcombine_s16(__a, vsubhn_s32(__b, __c)); } +__ai int32x4_t vsubhn_high_s64(int32x2_t __a, int64x2_t __b, int64x2_t __c) { + return vcombine_s32(__a, vsubhn_s64(__b, __c)); } +__ai uint8x16_t vsubhn_high_u16(uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) { + return vcombine_u8(__a, vsubhn_u16(__b, __c)); } +__ai uint16x8_t vsubhn_high_u32(uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) { + return vcombine_u16(__a, vsubhn_u32(__b, __c)); } +__ai uint32x4_t vsubhn_high_u64(uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) { + return vcombine_u32(__a, vsubhn_u64(__b, __c)); } + +__ai int16x8_t vsubl_high_s8(int8x16_t __a, int8x16_t __b) { + return vmovl_high_s8(__a) - vmovl_high_s8(__b); } +__ai int32x4_t vsubl_high_s16(int16x8_t __a, int16x8_t __b) { + return vmovl_high_s16(__a) - vmovl_high_s16(__b); } +__ai int64x2_t vsubl_high_s32(int32x4_t __a, int32x4_t __b) { + return vmovl_high_s32(__a) - vmovl_high_s32(__b); } +__ai uint16x8_t vsubl_high_u8(uint8x16_t __a, uint8x16_t __b) { + return vmovl_high_u8(__a) - vmovl_high_u8(__b); } +__ai uint32x4_t vsubl_high_u16(uint16x8_t __a, uint16x8_t __b) { + return vmovl_high_u16(__a) - vmovl_high_u16(__b); } +__ai uint64x2_t vsubl_high_u32(uint32x4_t __a, uint32x4_t __b) { + return vmovl_high_u32(__a) - vmovl_high_u32(__b); } + +__ai int16x8_t vsubw_high_s8(int16x8_t __a, int8x16_t __b) { + return __a - vmovl_high_s8(__b); } +__ai int32x4_t vsubw_high_s16(int32x4_t __a, int16x8_t __b) { + return __a - vmovl_high_s16(__b); } +__ai int64x2_t vsubw_high_s32(int64x2_t __a, int32x4_t __b) { + return __a - vmovl_high_s32(__b); } +__ai uint16x8_t vsubw_high_u8(uint16x8_t __a, uint8x16_t __b) { + return __a - vmovl_high_u8(__b); } +__ai uint32x4_t vsubw_high_u16(uint32x4_t __a, uint16x8_t __b) { + return __a - vmovl_high_u16(__b); } +__ai uint64x2_t vsubw_high_u32(uint64x2_t __a, uint32x4_t __b) { + return __a - vmovl_high_u32(__b); } + +__ai int8x8_t vtrn1_s8(int8x8_t __a, int8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); } +__ai int16x4_t vtrn1_s16(int16x4_t __a, int16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); } +__ai int32x2_t vtrn1_s32(int32x2_t __a, int32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai uint8x8_t vtrn1_u8(uint8x8_t __a, uint8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); } +__ai uint16x4_t vtrn1_u16(uint16x4_t __a, uint16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); } +__ai uint32x2_t vtrn1_u32(uint32x2_t __a, uint32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai float32x2_t vtrn1_f32(float32x2_t __a, float32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai poly8x8_t vtrn1_p8(poly8x8_t __a, poly8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); } +__ai poly16x4_t vtrn1_p16(poly16x4_t __a, poly16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); } +__ai int8x16_t vtrn1q_s8(int8x16_t __a, int8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); } +__ai int16x8_t vtrn1q_s16(int16x8_t __a, int16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); } +__ai int32x4_t vtrn1q_s32(int32x4_t __a, int32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); } +__ai int64x2_t vtrn1q_s64(int64x2_t __a, int64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai uint8x16_t vtrn1q_u8(uint8x16_t __a, uint8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); } +__ai uint16x8_t vtrn1q_u16(uint16x8_t __a, uint16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); } +__ai uint32x4_t vtrn1q_u32(uint32x4_t __a, uint32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); } +__ai uint64x2_t vtrn1q_u64(uint64x2_t __a, uint64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai float32x4_t vtrn1q_f32(float32x4_t __a, float32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 2, 6); } +__ai float64x2_t vtrn1q_f64(float64x2_t __a, float64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai poly8x16_t vtrn1q_p8(poly8x16_t __a, poly8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30); } +__ai poly16x8_t vtrn1q_p16(poly16x8_t __a, poly16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 2, 10, 4, 12, 6, 14); } + +__ai int8x8_t vtrn2_s8(int8x8_t __a, int8x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); } +__ai int16x4_t vtrn2_s16(int16x4_t __a, int16x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); } +__ai int32x2_t vtrn2_s32(int32x2_t __a, int32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai uint8x8_t vtrn2_u8(uint8x8_t __a, uint8x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); } +__ai uint16x4_t vtrn2_u16(uint16x4_t __a, uint16x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); } +__ai uint32x2_t vtrn2_u32(uint32x2_t __a, uint32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai float32x2_t vtrn2_f32(float32x2_t __a, float32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai poly8x8_t vtrn2_p8(poly8x8_t __a, poly8x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); } +__ai poly16x4_t vtrn2_p16(poly16x4_t __a, poly16x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); } +__ai int8x16_t vtrn2q_s8(int8x16_t __a, int8x16_t __b) { + return __builtin_shufflevector(__a, __b, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); } +__ai int16x8_t vtrn2q_s16(int16x8_t __a, int16x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); } +__ai int32x4_t vtrn2q_s32(int32x4_t __a, int32x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); } +__ai int64x2_t vtrn2q_s64(int64x2_t __a, int64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai uint8x16_t vtrn2q_u8(uint8x16_t __a, uint8x16_t __b) { + return __builtin_shufflevector(__a, __b, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); } +__ai uint16x8_t vtrn2q_u16(uint16x8_t __a, uint16x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); } +__ai uint32x4_t vtrn2q_u32(uint32x4_t __a, uint32x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); } +__ai uint64x2_t vtrn2q_u64(uint64x2_t __a, uint64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai float32x4_t vtrn2q_f32(float32x4_t __a, float32x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 5, 3, 7); } +__ai float64x2_t vtrn2q_f64(float64x2_t __a, float64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai poly8x16_t vtrn2q_p8(poly8x16_t __a, poly8x16_t __b) { + return __builtin_shufflevector(__a, __b, 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31); } +__ai poly16x8_t vtrn2q_p16(poly16x8_t __a, poly16x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 9, 3, 11, 5, 13, 7, 15); } + +__ai int8x8_t vuzp1_s8(int8x8_t __a, int8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); } +__ai int16x4_t vuzp1_s16(int16x4_t __a, int16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); } +__ai int32x2_t vuzp1_s32(int32x2_t __a, int32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai uint8x8_t vuzp1_u8(uint8x8_t __a, uint8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); } +__ai uint16x4_t vuzp1_u16(uint16x4_t __a, uint16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); } +__ai uint32x2_t vuzp1_u32(uint32x2_t __a, uint32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai float32x2_t vuzp1_f32(float32x2_t __a, float32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai poly8x8_t vuzp1_p8(poly8x8_t __a, poly8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); } +__ai poly16x4_t vuzp1_p16(poly16x4_t __a, poly16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); } +__ai int8x16_t vuzp1q_s8(int8x16_t __a, int8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); } +__ai int16x8_t vuzp1q_s16(int16x8_t __a, int16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); } +__ai int32x4_t vuzp1q_s32(int32x4_t __a, int32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); } +__ai int64x2_t vuzp1q_s64(int64x2_t __a, int64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai uint8x16_t vuzp1q_u8(uint8x16_t __a, uint8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); } +__ai uint16x8_t vuzp1q_u16(uint16x8_t __a, uint16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); } +__ai uint32x4_t vuzp1q_u32(uint32x4_t __a, uint32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); } +__ai uint64x2_t vuzp1q_u64(uint64x2_t __a, uint64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai float32x4_t vuzp1q_f32(float32x4_t __a, float32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6); } +__ai float64x2_t vuzp1q_f64(float64x2_t __a, float64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai poly8x16_t vuzp1q_p8(poly8x16_t __a, poly8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); } +__ai poly16x8_t vuzp1q_p16(poly16x8_t __a, poly16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14); } + +__ai int8x8_t vuzp2_s8(int8x8_t __a, int8x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); } +__ai int16x4_t vuzp2_s16(int16x4_t __a, int16x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); } +__ai int32x2_t vuzp2_s32(int32x2_t __a, int32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai uint8x8_t vuzp2_u8(uint8x8_t __a, uint8x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); } +__ai uint16x4_t vuzp2_u16(uint16x4_t __a, uint16x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); } +__ai uint32x2_t vuzp2_u32(uint32x2_t __a, uint32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai float32x2_t vuzp2_f32(float32x2_t __a, float32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai poly8x8_t vuzp2_p8(poly8x8_t __a, poly8x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); } +__ai poly16x4_t vuzp2_p16(poly16x4_t __a, poly16x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); } +__ai int8x16_t vuzp2q_s8(int8x16_t __a, int8x16_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); } +__ai int16x8_t vuzp2q_s16(int16x8_t __a, int16x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); } +__ai int32x4_t vuzp2q_s32(int32x4_t __a, int32x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); } +__ai int64x2_t vuzp2q_s64(int64x2_t __a, int64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai uint8x16_t vuzp2q_u8(uint8x16_t __a, uint8x16_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); } +__ai uint16x8_t vuzp2q_u16(uint16x8_t __a, uint16x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); } +__ai uint32x4_t vuzp2q_u32(uint32x4_t __a, uint32x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); } +__ai uint64x2_t vuzp2q_u64(uint64x2_t __a, uint64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai float32x4_t vuzp2q_f32(float32x4_t __a, float32x4_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7); } +__ai float64x2_t vuzp2q_f64(float64x2_t __a, float64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai poly8x16_t vuzp2q_p8(poly8x16_t __a, poly8x16_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); } +__ai poly16x8_t vuzp2q_p16(poly16x8_t __a, poly16x8_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15); } + +__ai int8x8_t vzip1_s8(int8x8_t __a, int8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); } +__ai int16x4_t vzip1_s16(int16x4_t __a, int16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); } +__ai int32x2_t vzip1_s32(int32x2_t __a, int32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai uint8x8_t vzip1_u8(uint8x8_t __a, uint8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); } +__ai uint16x4_t vzip1_u16(uint16x4_t __a, uint16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); } +__ai uint32x2_t vzip1_u32(uint32x2_t __a, uint32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai float32x2_t vzip1_f32(float32x2_t __a, float32x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai poly8x8_t vzip1_p8(poly8x8_t __a, poly8x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); } +__ai poly16x4_t vzip1_p16(poly16x4_t __a, poly16x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); } +__ai int8x16_t vzip1q_s8(int8x16_t __a, int8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); } +__ai int16x8_t vzip1q_s16(int16x8_t __a, int16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); } +__ai int32x4_t vzip1q_s32(int32x4_t __a, int32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); } +__ai int64x2_t vzip1q_s64(int64x2_t __a, int64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai uint8x16_t vzip1q_u8(uint8x16_t __a, uint8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); } +__ai uint16x8_t vzip1q_u16(uint16x8_t __a, uint16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); } +__ai uint32x4_t vzip1q_u32(uint32x4_t __a, uint32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); } +__ai uint64x2_t vzip1q_u64(uint64x2_t __a, uint64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai float32x4_t vzip1q_f32(float32x4_t __a, float32x4_t __b) { + return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); } +__ai float64x2_t vzip1q_f64(float64x2_t __a, float64x2_t __b) { + return __builtin_shufflevector(__a, __b, 0, 2); } +__ai poly8x16_t vzip1q_p8(poly8x16_t __a, poly8x16_t __b) { + return __builtin_shufflevector(__a, __b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); } +__ai poly16x8_t vzip1q_p16(poly16x8_t __a, poly16x8_t __b) { + return __builtin_shufflevector(__a, __b, 0, 8, 1, 9, 2, 10, 3, 11); } + +__ai int8x8_t vzip2_s8(int8x8_t __a, int8x8_t __b) { + return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); } +__ai int16x4_t vzip2_s16(int16x4_t __a, int16x4_t __b) { + return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); } +__ai int32x2_t vzip2_s32(int32x2_t __a, int32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai uint8x8_t vzip2_u8(uint8x8_t __a, uint8x8_t __b) { + return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); } +__ai uint16x4_t vzip2_u16(uint16x4_t __a, uint16x4_t __b) { + return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); } +__ai uint32x2_t vzip2_u32(uint32x2_t __a, uint32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai float32x2_t vzip2_f32(float32x2_t __a, float32x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai poly8x8_t vzip2_p8(poly8x8_t __a, poly8x8_t __b) { + return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); } +__ai poly16x4_t vzip2_p16(poly16x4_t __a, poly16x4_t __b) { + return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); } +__ai int8x16_t vzip2q_s8(int8x16_t __a, int8x16_t __b) { + return __builtin_shufflevector(__a, __b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); } +__ai int16x8_t vzip2q_s16(int16x8_t __a, int16x8_t __b) { + return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); } +__ai int32x4_t vzip2q_s32(int32x4_t __a, int32x4_t __b) { + return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); } +__ai int64x2_t vzip2q_s64(int64x2_t __a, int64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai uint8x16_t vzip2q_u8(uint8x16_t __a, uint8x16_t __b) { + return __builtin_shufflevector(__a, __b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); } +__ai uint16x8_t vzip2q_u16(uint16x8_t __a, uint16x8_t __b) { + return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); } +__ai uint32x4_t vzip2q_u32(uint32x4_t __a, uint32x4_t __b) { + return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); } +__ai uint64x2_t vzip2q_u64(uint64x2_t __a, uint64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai float32x4_t vzip2q_f32(float32x4_t __a, float32x4_t __b) { + return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); } +__ai float64x2_t vzip2q_f64(float64x2_t __a, float64x2_t __b) { + return __builtin_shufflevector(__a, __b, 1, 3); } +__ai poly8x16_t vzip2q_p8(poly8x16_t __a, poly8x16_t __b) { + return __builtin_shufflevector(__a, __b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); } +__ai poly16x8_t vzip2q_p16(poly16x8_t __a, poly16x8_t __b) { + return __builtin_shufflevector(__a, __b, 4, 12, 5, 13, 6, 14, 7, 15); } + +#endif + +#undef __ai + +#endif /* not __arm64 */ + +#endif /* __ARM_NEON_H */ diff --git a/python/clang/5.1/include/avx2intrin.h b/python/clang/5.1/include/avx2intrin.h new file mode 100644 index 00000000..95744693 --- /dev/null +++ b/python/clang/5.1/include/avx2intrin.h @@ -0,0 +1,1206 @@ +/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX2INTRIN_H +#define __AVX2INTRIN_H + +/* SSE4 Multiple Packed Sums of Absolute Difference. */ +#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_abs_epi8(__m256i __a) +{ + return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_abs_epi16(__m256i __a) +{ + return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_abs_epi32(__m256i __a) +{ + return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_packs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_packs_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_packus_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_packus_epi32(__m256i __V1, __m256i __V2) +{ + return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_add_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)((__v32qi)__a + (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_add_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a + (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_add_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a + (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_add_epi64(__m256i __a, __m256i __b) +{ + return __a + __b; +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_adds_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_adds_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_adds_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_adds_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); +} + +#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \ + __m256i __a = (a); \ + __m256i __b = (b); \ + (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); }) + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_and_si256(__m256i __a, __m256i __b) +{ + return __a & __b; +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_andnot_si256(__m256i __a, __m256i __b) +{ + return ~__a & __b; +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_avg_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_avg_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) +{ + return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, + (__v32qi)__M); +} + +#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m256i __V2 = (V2); \ + (__m256i)__builtin_ia32_pblendw256((__v16hi)__V1, (__v16hi)__V2, (M)); }) + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)((__v32qi)__a == (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a == (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a == (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpeq_epi64(__m256i __a, __m256i __b) +{ + return (__m256i)(__a == __b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)((__v32qi)__a > (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a > (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a > (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmpgt_epi64(__m256i __a, __m256i __b) +{ + return (__m256i)(__a > __b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hadd_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hadd_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hadds_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hsub_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hsub_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_hsubs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_maddubs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_madd_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_max_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_max_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_max_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_max_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_max_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_max_epu32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_min_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_min_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_min_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_min_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_min_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_min_epu32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm256_movemask_epi8(__m256i __a) +{ + return __builtin_ia32_pmovmskb256((__v32qi)__a); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi8_epi16(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxbw256((__v16qi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi8_epi32(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxbd256((__v16qi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi8_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxbq256((__v16qi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi16_epi32(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxwd256((__v8hi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi16_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxwq256((__v8hi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi32_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovsxdq256((__v4si)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepu8_epi16(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxbw256((__v16qi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepu8_epi32(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxbd256((__v16qi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepu8_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxbq256((__v16qi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepu16_epi32(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxwd256((__v8hi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepu16_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxwq256((__v8hi)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepu32_epi64(__m128i __V) +{ + return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhrs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhi_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhi_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mullo_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a * (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mullo_epi32 (__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a * (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_epu32(__m256i __a, __m256i __b) +{ + return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_or_si256(__m256i __a, __m256i __b) +{ + return __a | __b; +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sad_epu8(__m256i __a, __m256i __b) +{ + return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_shuffle_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); +} + +#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \ + (imm) & 0x3, ((imm) & 0xc) >> 2, \ + ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ + 4 + (((imm) & 0x03) >> 0), \ + 4 + (((imm) & 0x0c) >> 2), \ + 4 + (((imm) & 0x30) >> 4), \ + 4 + (((imm) & 0xc0) >> 6)); }) + +#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ + 0, 1, 2, 3, \ + 4 + (((imm) & 0x03) >> 0), \ + 4 + (((imm) & 0x0c) >> 2), \ + 4 + (((imm) & 0x30) >> 4), \ + 4 + (((imm) & 0xc0) >> 6), \ + 8, 9, 10, 11, \ + 12 + (((imm) & 0x03) >> 0), \ + 12 + (((imm) & 0x0c) >> 2), \ + 12 + (((imm) & 0x30) >> 4), \ + 12 + (((imm) & 0xc0) >> 6)); }) + +#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \ + (imm) & 0x3,((imm) & 0xc) >> 2, \ + ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ + 4, 5, 6, 7, \ + 8 + (((imm) & 0x03) >> 0), \ + 8 + (((imm) & 0x0c) >> 2), \ + 8 + (((imm) & 0x30) >> 4), \ + 8 + (((imm) & 0xc0) >> 6), \ + 12, 13, 14, 15); }) + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sign_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sign_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sign_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); +} + +#define _mm256_slli_si256(a, count) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); }) + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_slli_epi16(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sll_epi16(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_slli_epi32(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sll_epi32(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_slli_epi64(__m256i __a, int __count) +{ + return __builtin_ia32_psllqi256(__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sll_epi64(__m256i __a, __m128i __count) +{ + return __builtin_ia32_psllq256(__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srai_epi16(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sra_epi16(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srai_epi32(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sra_epi32(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); +} + +#define _mm256_srli_si256(a, count) __extension__ ({ \ + __m256i __a = (a); \ + (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); }) + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srli_epi16(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srl_epi16(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srli_epi32(__m256i __a, int __count) +{ + return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srl_epi32(__m256i __a, __m128i __count) +{ + return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srli_epi64(__m256i __a, int __count) +{ + return __builtin_ia32_psrlqi256(__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srl_epi64(__m256i __a, __m128i __count) +{ + return __builtin_ia32_psrlq256(__a, __count); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sub_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)((__v32qi)__a - (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sub_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)((__v16hi)__a - (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sub_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)((__v8si)__a - (__v8si)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sub_epi64(__m256i __a, __m256i __b) +{ + return __a - __b; +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_subs_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_subs_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_subs_epu8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_subs_epu16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_unpackhi_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_unpackhi_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_unpackhi_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_unpackhi_epi64(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector(__a, __b, 1, 4+1, 3, 4+3); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_unpacklo_epi8(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_unpacklo_epi16(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_unpacklo_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_unpacklo_epi64(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_shufflevector(__a, __b, 0, 4+0, 2, 4+2); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_xor_si256(__m256i __a, __m256i __b) +{ + return __a ^ __b; +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_stream_load_si256(__m256i *__V) +{ + return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_broadcastss_ps(__m128 __X) +{ + return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcastss_ps(__m128 __X) +{ + return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcastsd_pd(__m128d __X) +{ + return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcastsi128_si256(__m128i __X) +{ + return (__m256i)__builtin_ia32_vbroadcastsi256(__X); +} + +#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ + __m128i __V1 = (V1); \ + __m128i __V2 = (V2); \ + (__m128i)__builtin_ia32_pblendd128((__v4si)__V1, (__v4si)__V2, (M)); }) + +#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m256i __V2 = (V2); \ + (__m256i)__builtin_ia32_pblendd256((__v8si)__V1, (__v8si)__V2, (M)); }) + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcastb_epi8(__m128i __X) +{ + return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcastw_epi16(__m128i __X) +{ + return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcastd_epi32(__m128i __X) +{ + return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcastq_epi64(__m128i __X) +{ + return (__m256i)__builtin_ia32_pbroadcastq256(__X); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_broadcastb_epi8(__m128i __X) +{ + return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_broadcastw_epi16(__m128i __X) +{ + return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X); +} + + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_broadcastd_epi32(__m128i __X) +{ + return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_broadcastq_epi64(__m128i __X) +{ + return (__m128i)__builtin_ia32_pbroadcastq128(__X); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) +{ + return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); +} + +#define _mm256_permute4x64_pd(V, M) __extension__ ({ \ + __m256d __V = (V); \ + (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \ + (M) & 0x3, ((M) & 0xc) >> 2, \ + ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_permutevar8x32_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b); +} + +#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \ + __m256i __V = (V); \ + (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \ + (M) & 0x3, ((M) & 0xc) >> 2, \ + ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); }) + +#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m256i __V2 = (V2); \ + (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); }) + +#define _mm256_extracti128_si256(A, O) __extension__ ({ \ + __m256i __A = (A); \ + (__m128i)__builtin_ia32_extract128i256(__A, (O)); }) + +#define _mm256_inserti128_si256(V1, V2, O) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m128i __V2 = (V2); \ + (__m256i)__builtin_ia32_insert128i256(__V1, __V2, (O)); }) + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_maskload_epi32(int const *__X, __m256i __M) +{ + return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_maskload_epi64(long long const *__X, __m256i __M) +{ + return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, __M); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maskload_epi32(int const *__X, __m128i __M) +{ + return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maskload_epi64(long long const *__X, __m128i __M) +{ + return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) +{ + __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) +{ + __builtin_ia32_maskstoreq256((__v4di *)__X, __M, __Y); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) +{ + __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) +{ + __builtin_ia32_maskstoreq(( __v2di *)__X, __M, __Y); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sllv_epi32(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sllv_epi32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_sllv_epi64(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psllv4di(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sllv_epi64(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psllv2di(__X, __Y); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srav_epi32(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srav_epi32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srlv_epi32(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srlv_epi32(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_srlv_epi64(__m256i __X, __m256i __Y) +{ + return (__m256i)__builtin_ia32_psrlv4di(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srlv_epi64(__m128i __X, __m128i __Y) +{ + return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); +} + +#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m128d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m128d __mask = (mask); \ + (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \ + (__v4si)__i, (__v2df)__mask, (s)); }) + +#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m256d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m256d __mask = (mask); \ + (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \ + (__v4si)__i, (__v4df)__mask, (s)); }) + +#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m128d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m128d __mask = (mask); \ + (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \ + (__v2di)__i, (__v2df)__mask, (s)); }) + +#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m256d __a = (a); \ + double const *__m = (m); \ + __m256i __i = (i); \ + __m256d __mask = (mask); \ + (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \ + (__v4di)__i, (__v4df)__mask, (s)); }) + +#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m128i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \ + (__v4si)__i, (__v4sf)__mask, (s)); }) + +#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m256 __a = (a); \ + float const *__m = (m); \ + __m256i __i = (i); \ + __m256 __mask = (mask); \ + (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \ + (__v8si)__i, (__v8sf)__mask, (s)); }) + +#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m128i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \ + (__v2di)__i, (__v4sf)__mask, (s)); }) + +#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m256i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \ + (__v4di)__i, (__v4sf)__mask, (s)); }) + +#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \ + (__v4si)__i, (__v4si)__mask, (s)); }) + +#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \ + (__v8si)__i, (__v8si)__mask, (s)); }) + +#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \ + (__v2di)__i, (__v4si)__mask, (s)); }) + +#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \ + (__v4di)__i, (__v4si)__mask, (s)); }) + +#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + long long const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ + (__v4si)__i, (__v2di)__mask, (s)); }) + +#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + long long const *__m = (m); \ + __m128i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ + (__v4si)__i, (__v4di)__mask, (s)); }) + +#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + long long const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ + (__v2di)__i, (__v2di)__mask, (s)); }) + +#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + long long const *__m = (m); \ + __m256i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ + (__v4di)__i, (__v4di)__mask, (s)); }) + +#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \ + (const __v2df *)__m, (__v4si)__i, \ + (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \ + (const __v4df *)__m, (__v4si)__i, \ + (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \ + (const __v2df *)__m, (__v2di)__i, \ + (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m256i __i = (i); \ + (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \ + (const __v4df *)__m, (__v4di)__i, \ + (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m128i __i = (i); \ + (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v4si)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m256i __i = (i); \ + (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \ + (const __v8sf *)__m, (__v8si)__i, \ + (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); }) + +#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m128i __i = (i); \ + (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v2di)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m256i __i = (i); \ + (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v4di)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v4si)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \ + (const __v8si *)__m, (__v8si)__i, \ + (__v8si)_mm256_set1_epi32(-1), (s)); }) + +#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v2di)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v4di)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ + long long const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ + (const __v2di *)__m, (__v4si)__i, \ + (__v2di)_mm_set1_epi64x(-1), (s)); }) + +#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ + long long const *__m = (m); \ + __m128i __i = (i); \ + (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ + (const __v4di *)__m, (__v4si)__i, \ + (__v4di)_mm256_set1_epi64x(-1), (s)); }) + +#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ + long long const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ + (const __v2di *)__m, (__v2di)__i, \ + (__v2di)_mm_set1_epi64x(-1), (s)); }) + +#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ + long long const *__m = (m); \ + __m256i __i = (i); \ + (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ + (const __v4di *)__m, (__v4di)__i, \ + (__v4di)_mm256_set1_epi64x(-1), (s)); }) + +#endif /* __AVX2INTRIN_H */ diff --git a/python/clang/5.1/include/avxintrin.h b/python/clang/5.1/include/avxintrin.h new file mode 100644 index 00000000..141c4d99 --- /dev/null +++ b/python/clang/5.1/include/avxintrin.h @@ -0,0 +1,1224 @@ +/*===---- avxintrin.h - AVX intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVXINTRIN_H +#define __AVXINTRIN_H + +typedef double __v4df __attribute__ ((__vector_size__ (32))); +typedef float __v8sf __attribute__ ((__vector_size__ (32))); +typedef long long __v4di __attribute__ ((__vector_size__ (32))); +typedef int __v8si __attribute__ ((__vector_size__ (32))); +typedef short __v16hi __attribute__ ((__vector_size__ (32))); +typedef char __v32qi __attribute__ ((__vector_size__ (32))); + +typedef float __m256 __attribute__ ((__vector_size__ (32))); +typedef double __m256d __attribute__((__vector_size__(32))); +typedef long long __m256i __attribute__((__vector_size__(32))); + +/* Arithmetic */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_add_pd(__m256d __a, __m256d __b) +{ + return __a+__b; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_add_ps(__m256 __a, __m256 __b) +{ + return __a+__b; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_sub_pd(__m256d __a, __m256d __b) +{ + return __a-__b; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_sub_ps(__m256 __a, __m256 __b) +{ + return __a-__b; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_addsub_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_addsub_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_div_pd(__m256d __a, __m256d __b) +{ + return __a / __b; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_div_ps(__m256 __a, __m256 __b) +{ + return __a / __b; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_max_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_max_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_min_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_min_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_pd(__m256d __a, __m256d __b) +{ + return __a * __b; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_ps(__m256 __a, __m256 __b) +{ + return __a * __b; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_sqrt_pd(__m256d __a) +{ + return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_sqrt_ps(__m256 __a) +{ + return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_rsqrt_ps(__m256 __a) +{ + return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_rcp_ps(__m256 __a) +{ + return (__m256)__builtin_ia32_rcpps256((__v8sf)__a); +} + +#define _mm256_round_pd(V, M) __extension__ ({ \ + __m256d __V = (V); \ + (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); }) + +#define _mm256_round_ps(V, M) __extension__ ({ \ + __m256 __V = (V); \ + (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); }) + +#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL) +#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR) +#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL) +#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR) + +/* Logical */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_and_pd(__m256d __a, __m256d __b) +{ + return (__m256d)((__v4di)__a & (__v4di)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_and_ps(__m256 __a, __m256 __b) +{ + return (__m256)((__v8si)__a & (__v8si)__b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_andnot_pd(__m256d __a, __m256d __b) +{ + return (__m256d)(~(__v4di)__a & (__v4di)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_andnot_ps(__m256 __a, __m256 __b) +{ + return (__m256)(~(__v8si)__a & (__v8si)__b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_or_pd(__m256d __a, __m256d __b) +{ + return (__m256d)((__v4di)__a | (__v4di)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_or_ps(__m256 __a, __m256 __b) +{ + return (__m256)((__v8si)__a | (__v8si)__b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_xor_pd(__m256d __a, __m256d __b) +{ + return (__m256d)((__v4di)__a ^ (__v4di)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_xor_ps(__m256 __a, __m256 __b) +{ + return (__m256)((__v8si)__a ^ (__v8si)__b); +} + +/* Horizontal arithmetic */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_hadd_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_hadd_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_hsub_pd(__m256d __a, __m256d __b) +{ + return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_hsub_ps(__m256 __a, __m256 __b) +{ + return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); +} + +/* Vector permutations */ +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_permutevar_pd(__m128d __a, __m128i __c) +{ + return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_permutevar_pd(__m256d __a, __m256i __c) +{ + return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_permutevar_ps(__m128 __a, __m128i __c) +{ + return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_permutevar_ps(__m256 __a, __m256i __c) +{ + return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, + (__v8si)__c); +} + +#define _mm_permute_pd(A, C) __extension__ ({ \ + __m128d __A = (A); \ + (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \ + (C) & 0x1, ((C) & 0x2) >> 1); }) + +#define _mm256_permute_pd(A, C) __extension__ ({ \ + __m256d __A = (A); \ + (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \ + (C) & 0x1, ((C) & 0x2) >> 1, \ + 2 + (((C) & 0x4) >> 2), \ + 2 + (((C) & 0x8) >> 3)); }) + +#define _mm_permute_ps(A, C) __extension__ ({ \ + __m128 __A = (A); \ + (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \ + (C) & 0x3, ((C) & 0xc) >> 2, \ + ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); }) + +#define _mm256_permute_ps(A, C) __extension__ ({ \ + __m256 __A = (A); \ + (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \ + (C) & 0x3, ((C) & 0xc) >> 2, \ + ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \ + 4 + (((C) & 0x03) >> 0), \ + 4 + (((C) & 0x0c) >> 2), \ + 4 + (((C) & 0x30) >> 4), \ + 4 + (((C) & 0xc0) >> 6)); }) + +#define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \ + __m256d __V1 = (V1); \ + __m256d __V2 = (V2); \ + (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); }) + +#define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \ + __m256 __V1 = (V1); \ + __m256 __V2 = (V2); \ + (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) + +#define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m256i __V2 = (V2); \ + (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); }) + +/* Vector Blend */ +#define _mm256_blend_pd(V1, V2, M) __extension__ ({ \ + __m256d __V1 = (V1); \ + __m256d __V2 = (V2); \ + (__m256d)__builtin_ia32_blendpd256((__v4df)__V1, (__v4df)__V2, (M)); }) + +#define _mm256_blend_ps(V1, V2, M) __extension__ ({ \ + __m256 __V1 = (V1); \ + __m256 __V2 = (V2); \ + (__m256)__builtin_ia32_blendps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) +{ + return (__m256d)__builtin_ia32_blendvpd256( + (__v4df)__a, (__v4df)__b, (__v4df)__c); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) +{ + return (__m256)__builtin_ia32_blendvps256( + (__v8sf)__a, (__v8sf)__b, (__v8sf)__c); +} + +/* Vector Dot Product */ +#define _mm256_dp_ps(V1, V2, M) __extension__ ({ \ + __m256 __V1 = (V1); \ + __m256 __V2 = (V2); \ + (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); }) + +/* Vector shuffle */ +#define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \ + __m256 __a = (a); \ + __m256 __b = (b); \ + (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \ + (mask) & 0x3, ((mask) & 0xc) >> 2, \ + (((mask) & 0x30) >> 4) + 8, (((mask) & 0xc0) >> 6) + 8, \ + ((mask) & 0x3) + 4, (((mask) & 0xc) >> 2) + 4, \ + (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); }) + +#define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \ + __m256d __a = (a); \ + __m256d __b = (b); \ + (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \ + (mask) & 0x1, \ + (((mask) & 0x2) >> 1) + 4, \ + (((mask) & 0x4) >> 2) + 2, \ + (((mask) & 0x8) >> 3) + 6); }) + +/* Compare */ +#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ +#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ +#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ +#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ +#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ +#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ +#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ +#define _CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ +#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ +#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ +#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ +#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ +#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ +#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ +#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ +#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ +#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ +#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ +#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ +#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */ +#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ +#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ +#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ +#define _CMP_ORD_S 0x17 /* Ordered (signaling) */ +#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ +#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ +#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ +#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ +#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ +#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ +#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ +#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */ + +#define _mm_cmp_pd(a, b, c) __extension__ ({ \ + __m128d __a = (a); \ + __m128d __b = (b); \ + (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); }) + +#define _mm_cmp_ps(a, b, c) __extension__ ({ \ + __m128 __a = (a); \ + __m128 __b = (b); \ + (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); }) + +#define _mm256_cmp_pd(a, b, c) __extension__ ({ \ + __m256d __a = (a); \ + __m256d __b = (b); \ + (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); }) + +#define _mm256_cmp_ps(a, b, c) __extension__ ({ \ + __m256 __a = (a); \ + __m256 __b = (b); \ + (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); }) + +#define _mm_cmp_sd(a, b, c) __extension__ ({ \ + __m128d __a = (a); \ + __m128d __b = (b); \ + (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); }) + +#define _mm_cmp_ss(a, b, c) __extension__ ({ \ + __m128 __a = (a); \ + __m128 __b = (b); \ + (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); }) + +/* Vector extract */ +#define _mm256_extractf128_pd(A, O) __extension__ ({ \ + __m256d __A = (A); \ + (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)__A, (O)); }) + +#define _mm256_extractf128_ps(A, O) __extension__ ({ \ + __m256 __A = (A); \ + (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)__A, (O)); }) + +#define _mm256_extractf128_si256(A, O) __extension__ ({ \ + __m256i __A = (A); \ + (__m128i)__builtin_ia32_vextractf128_si256((__v8si)__A, (O)); }) + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_extract_epi32(__m256i __a, int const __imm) +{ + __v8si __b = (__v8si)__a; + return __b[__imm & 7]; +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_extract_epi16(__m256i __a, int const __imm) +{ + __v16hi __b = (__v16hi)__a; + return __b[__imm & 15]; +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_extract_epi8(__m256i __a, int const __imm) +{ + __v32qi __b = (__v32qi)__a; + return __b[__imm & 31]; +} + +#ifdef __x86_64__ +static __inline long long __attribute__((__always_inline__, __nodebug__)) +_mm256_extract_epi64(__m256i __a, const int __imm) +{ + __v4di __b = (__v4di)__a; + return __b[__imm & 3]; +} +#endif + +/* Vector insert */ +#define _mm256_insertf128_pd(V1, V2, O) __extension__ ({ \ + __m256d __V1 = (V1); \ + __m128d __V2 = (V2); \ + (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)__V1, (__v2df)__V2, (O)); }) + +#define _mm256_insertf128_ps(V1, V2, O) __extension__ ({ \ + __m256 __V1 = (V1); \ + __m128 __V2 = (V2); \ + (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)__V1, (__v4sf)__V2, (O)); }) + +#define _mm256_insertf128_si256(V1, V2, O) __extension__ ({ \ + __m256i __V1 = (V1); \ + __m128i __V2 = (V2); \ + (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)__V1, (__v4si)__V2, (O)); }) + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insert_epi32(__m256i __a, int __b, int const __imm) +{ + __v8si __c = (__v8si)__a; + __c[__imm & 7] = __b; + return (__m256i)__c; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insert_epi16(__m256i __a, int __b, int const __imm) +{ + __v16hi __c = (__v16hi)__a; + __c[__imm & 15] = __b; + return (__m256i)__c; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insert_epi8(__m256i __a, int __b, int const __imm) +{ + __v32qi __c = (__v32qi)__a; + __c[__imm & 31] = __b; + return (__m256i)__c; +} + +#ifdef __x86_64__ +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_insert_epi64(__m256i __a, int __b, int const __imm) +{ + __v4di __c = (__v4di)__a; + __c[__imm & 3] = __b; + return (__m256i)__c; +} +#endif + +/* Conversion */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi32_pd(__m128i __a) +{ + return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtepi32_ps(__m256i __a) +{ + return (__m256)__builtin_ia32_cvtdq2ps256((__v8si) __a); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtpd_ps(__m256d __a) +{ + return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtps_epi32(__m256 __a) +{ + return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtps_pd(__m128 __a) +{ + return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a); +} + +static __inline __m128i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvttpd_epi32(__m256d __a) +{ + return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); +} + +static __inline __m128i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtpd_epi32(__m256d __a) +{ + return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cvttps_epi32(__m256 __a) +{ + return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); +} + +/* Vector replicate */ +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_movehdup_ps(__m256 __a) +{ + return __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_moveldup_ps(__m256 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0, 2, 2, 4, 4, 6, 6); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_movedup_pd(__m256d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); +} + +/* Unpack and Interleave */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_unpackhi_pd(__m256d __a, __m256d __b) +{ + return __builtin_shufflevector(__a, __b, 1, 5, 1+2, 5+2); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_unpacklo_pd(__m256d __a, __m256d __b) +{ + return __builtin_shufflevector(__a, __b, 0, 4, 0+2, 4+2); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_unpackhi_ps(__m256 __a, __m256 __b) +{ + return __builtin_shufflevector(__a, __b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_unpacklo_ps(__m256 __a, __m256 __b) +{ + return __builtin_shufflevector(__a, __b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1); +} + +/* Bit Test */ +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testz_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testc_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testnzc_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testz_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testc_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm_testnzc_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testz_pd(__m256d __a, __m256d __b) +{ + return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testc_pd(__m256d __a, __m256d __b) +{ + return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testnzc_pd(__m256d __a, __m256d __b) +{ + return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testz_ps(__m256 __a, __m256 __b) +{ + return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testc_ps(__m256 __a, __m256 __b) +{ + return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testnzc_ps(__m256 __a, __m256 __b) +{ + return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testz_si256(__m256i __a, __m256i __b) +{ + return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testc_si256(__m256i __a, __m256i __b) +{ + return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_testnzc_si256(__m256i __a, __m256i __b) +{ + return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); +} + +/* Vector extract sign mask */ +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_movemask_pd(__m256d __a) +{ + return __builtin_ia32_movmskpd256((__v4df)__a); +} + +static __inline int __attribute__((__always_inline__, __nodebug__)) +_mm256_movemask_ps(__m256 __a) +{ + return __builtin_ia32_movmskps256((__v8sf)__a); +} + +/* Vector __zero */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_zeroall(void) +{ + __builtin_ia32_vzeroall(); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_zeroupper(void) +{ + __builtin_ia32_vzeroupper(); +} + +/* Vector load with broadcast */ +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_broadcast_ss(float const *__a) +{ + return (__m128)__builtin_ia32_vbroadcastss(__a); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcast_sd(double const *__a) +{ + return (__m256d)__builtin_ia32_vbroadcastsd256(__a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcast_ss(float const *__a) +{ + return (__m256)__builtin_ia32_vbroadcastss256(__a); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcast_pd(__m128d const *__a) +{ + return (__m256d)__builtin_ia32_vbroadcastf128_pd256(__a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_broadcast_ps(__m128 const *__a) +{ + return (__m256)__builtin_ia32_vbroadcastf128_ps256(__a); +} + +/* SIMD load ops */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_load_pd(double const *__p) +{ + return *(__m256d *)__p; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_load_ps(float const *__p) +{ + return *(__m256 *)__p; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu_pd(double const *__p) +{ + struct __loadu_pd { + __m256d __v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_pd*)__p)->__v; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu_ps(float const *__p) +{ + struct __loadu_ps { + __m256 __v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_ps*)__p)->__v; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_load_si256(__m256i const *__p) +{ + return *__p; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu_si256(__m256i const *__p) +{ + struct __loadu_si256 { + __m256i __v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_si256*)__p)->__v; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_lddqu_si256(__m256i const *__p) +{ + return (__m256i)__builtin_ia32_lddqu256((char const *)__p); +} + +/* SIMD store ops */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_store_pd(double *__p, __m256d __a) +{ + *(__m256d *)__p = __a; +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_store_ps(float *__p, __m256 __a) +{ + *(__m256 *)__p = __a; +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu_pd(double *__p, __m256d __a) +{ + __builtin_ia32_storeupd256(__p, (__v4df)__a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu_ps(float *__p, __m256 __a) +{ + __builtin_ia32_storeups256(__p, (__v8sf)__a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_store_si256(__m256i *__p, __m256i __a) +{ + *__p = __a; +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu_si256(__m256i *__p, __m256i __a) +{ + __builtin_ia32_storedqu256((char *)__p, (__v32qi)__a); +} + +/* Conditional load ops */ +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_maskload_pd(double const *__p, __m128d __m) +{ + return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_maskload_pd(double const *__p, __m256d __m) +{ + return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p, + (__v4df)__m); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_maskload_ps(float const *__p, __m128 __m) +{ + return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_maskload_ps(float const *__p, __m256 __m) +{ + return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m); +} + +/* Conditional store ops */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a) +{ + __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm_maskstore_pd(double *__p, __m128d __m, __m128d __a) +{ + __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a) +{ + __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm_maskstore_ps(float *__p, __m128 __m, __m128 __a) +{ + __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a); +} + +/* Cacheability support ops */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_stream_si256(__m256i *__a, __m256i __b) +{ + __builtin_ia32_movntdq256((__v4di *)__a, (__v4di)__b); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_stream_pd(double *__a, __m256d __b) +{ + __builtin_ia32_movntpd256(__a, (__v4df)__b); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_stream_ps(float *__p, __m256 __a) +{ + __builtin_ia32_movntps256(__p, (__v8sf)__a); +} + +/* Create vectors */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_set_pd(double __a, double __b, double __c, double __d) +{ + return (__m256d){ __d, __c, __b, __a }; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_set_ps(float __a, float __b, float __c, float __d, + float __e, float __f, float __g, float __h) +{ + return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3, + int __i4, int __i5, int __i6, int __i7) +{ + return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12, + short __w11, short __w10, short __w09, short __w08, + short __w07, short __w06, short __w05, short __w04, + short __w03, short __w02, short __w01, short __w00) +{ + return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06, + __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28, + char __b27, char __b26, char __b25, char __b24, + char __b23, char __b22, char __b21, char __b20, + char __b19, char __b18, char __b17, char __b16, + char __b15, char __b14, char __b13, char __b12, + char __b11, char __b10, char __b09, char __b08, + char __b07, char __b06, char __b05, char __b04, + char __b03, char __b02, char __b01, char __b00) +{ + return (__m256i)(__v32qi){ + __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07, + __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15, + __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23, + __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31 + }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) +{ + return (__m256i)(__v4di){ __d, __c, __b, __a }; +} + +/* Create vectors with elements in reverse order */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_pd(double __a, double __b, double __c, double __d) +{ + return (__m256d){ __a, __b, __c, __d }; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_ps(float __a, float __b, float __c, float __d, + float __e, float __f, float __g, float __h) +{ + return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3, + int __i4, int __i5, int __i6, int __i7) +{ + return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12, + short __w11, short __w10, short __w09, short __w08, + short __w07, short __w06, short __w05, short __w04, + short __w03, short __w02, short __w01, short __w00) +{ + return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09, + __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28, + char __b27, char __b26, char __b25, char __b24, + char __b23, char __b22, char __b21, char __b20, + char __b19, char __b18, char __b17, char __b16, + char __b15, char __b14, char __b13, char __b12, + char __b11, char __b10, char __b09, char __b08, + char __b07, char __b06, char __b05, char __b04, + char __b03, char __b02, char __b01, char __b00) +{ + return (__m256i)(__v32qi){ + __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24, + __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16, + __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08, + __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) +{ + return (__m256i)(__v4di){ __a, __b, __c, __d }; +} + +/* Create vectors with repeated elements */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_pd(double __w) +{ + return (__m256d){ __w, __w, __w, __w }; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_ps(float __w) +{ + return (__m256){ __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_epi32(int __i) +{ + return (__m256i)(__v8si){ __i, __i, __i, __i, __i, __i, __i, __i }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_epi16(short __w) +{ + return (__m256i)(__v16hi){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, + __w, __w, __w, __w, __w, __w }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_epi8(char __b) +{ + return (__m256i)(__v32qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_set1_epi64x(long long __q) +{ + return (__m256i)(__v4di){ __q, __q, __q, __q }; +} + +/* Create __zeroed vectors */ +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_setzero_pd(void) +{ + return (__m256d){ 0, 0, 0, 0 }; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_setzero_ps(void) +{ + return (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 }; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_setzero_si256(void) +{ + return (__m256i){ 0LL, 0LL, 0LL, 0LL }; +} + +/* Cast between vector types */ +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_castpd_ps(__m256d __a) +{ + return (__m256)__a; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_castpd_si256(__m256d __a) +{ + return (__m256i)__a; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_castps_pd(__m256 __a) +{ + return (__m256d)__a; +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_castps_si256(__m256 __a) +{ + return (__m256i)__a; +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_castsi256_ps(__m256i __a) +{ + return (__m256)__a; +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_castsi256_pd(__m256i __a) +{ + return (__m256d)__a; +} + +static __inline __m128d __attribute__((__always_inline__, __nodebug__)) +_mm256_castpd256_pd128(__m256d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1); +} + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm256_castps256_ps128(__m256 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); +} + +static __inline __m128i __attribute__((__always_inline__, __nodebug__)) +_mm256_castsi256_si128(__m256i __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_castpd128_pd256(__m128d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_castps128_ps256(__m128 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_castsi128_si256(__m128i __a) +{ + return __builtin_shufflevector(__a, __a, 0, 1, -1, -1); +} + +/* SIMD load ops (unaligned) */ +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo) +{ + struct __loadu_ps { + __m128 __v; + } __attribute__((__packed__, __may_alias__)); + + __m256 __v256 = _mm256_castps128_ps256(((struct __loadu_ps*)__addr_lo)->__v); + return _mm256_insertf128_ps(__v256, ((struct __loadu_ps*)__addr_hi)->__v, 1); +} + +static __inline __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) +{ + struct __loadu_pd { + __m128d __v; + } __attribute__((__packed__, __may_alias__)); + + __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v); + return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1); +} + +static __inline __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) +{ + struct __loadu_si128 { + __m128i __v; + } __attribute__((packed, may_alias)); + __m256i __v256 = _mm256_castsi128_si256( + ((struct __loadu_si128*)__addr_lo)->__v); + return _mm256_insertf128_si256(__v256, + ((struct __loadu_si128*)__addr_hi)->__v, 1); +} + +/* SIMD store ops (unaligned) */ +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a) +{ + __m128 __v128; + + __v128 = _mm256_castps256_ps128(__a); + __builtin_ia32_storeups(__addr_lo, __v128); + __v128 = _mm256_extractf128_ps(__a, 1); + __builtin_ia32_storeups(__addr_hi, __v128); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) +{ + __m128d __v128; + + __v128 = _mm256_castpd256_pd128(__a); + __builtin_ia32_storeupd(__addr_lo, __v128); + __v128 = _mm256_extractf128_pd(__a, 1); + __builtin_ia32_storeupd(__addr_hi, __v128); +} + +static __inline void __attribute__((__always_inline__, __nodebug__)) +_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) +{ + __m128i __v128; + + __v128 = _mm256_castsi256_si128(__a); + __builtin_ia32_storedqu((char *)__addr_lo, (__v16qi)__v128); + __v128 = _mm256_extractf128_si256(__a, 1); + __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128); +} + +#endif /* __AVXINTRIN_H */ diff --git a/python/clang/5.1/include/bmi2intrin.h b/python/clang/5.1/include/bmi2intrin.h new file mode 100644 index 00000000..a05cfad3 --- /dev/null +++ b/python/clang/5.1/include/bmi2intrin.h @@ -0,0 +1,94 @@ +/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __BMI2__ +# error "BMI2 instruction set not enabled" +#endif /* __BMI2__ */ + +#ifndef __BMI2INTRIN_H +#define __BMI2INTRIN_H + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_bzhi_u32(unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_bzhi_si(__X, __Y); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_pdep_u32(unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_pdep_si(__X, __Y); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_pext_u32(unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_pext_si(__X, __Y); +} + +#ifdef __x86_64__ + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_bzhi_u64(unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_bzhi_di(__X, __Y); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_pdep_u64(unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_pdep_di(__X, __Y); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_pext_u64(unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_pext_di(__X, __Y); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_mulx_u64 (unsigned long long __X, unsigned long long __Y, + unsigned long long *__P) +{ + unsigned __int128 __res = (unsigned __int128) __X * __Y; + *__P = (unsigned long long) (__res >> 64); + return (unsigned long long) __res; +} + +#else /* !__x86_64__ */ + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P) +{ + unsigned long long __res = (unsigned long long) __X * __Y; + *__P = (unsigned int) (__res >> 32); + return (unsigned int) __res; +} + +#endif /* !__x86_64__ */ + +#endif /* __BMI2INTRIN_H */ diff --git a/python/clang/5.1/include/bmiintrin.h b/python/clang/5.1/include/bmiintrin.h new file mode 100644 index 00000000..8cb00f51 --- /dev/null +++ b/python/clang/5.1/include/bmiintrin.h @@ -0,0 +1,115 @@ +/*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __BMI__ +# error "BMI instruction set not enabled" +#endif /* __BMI__ */ + +#ifndef __BMIINTRIN_H +#define __BMIINTRIN_H + +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +__tzcnt_u16(unsigned short __X) +{ + return __builtin_ctzs(__X); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__andn_u32(unsigned int __X, unsigned int __Y) +{ + return ~__X & __Y; +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__bextr_u32(unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_bextr_u32(__X, __Y); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blsi_u32(unsigned int __X) +{ + return __X & -__X; +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blsmsk_u32(unsigned int __X) +{ + return __X ^ (__X - 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blsr_u32(unsigned int __X) +{ + return __X & (__X - 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__tzcnt_u32(unsigned int __X) +{ + return __builtin_ctz(__X); +} + +#ifdef __x86_64__ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__andn_u64 (unsigned long long __X, unsigned long long __Y) +{ + return ~__X & __Y; +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__bextr_u64(unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_bextr_u64(__X, __Y); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__blsi_u64(unsigned long long __X) +{ + return __X & -__X; +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__blsmsk_u64(unsigned long long __X) +{ + return __X ^ (__X - 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__blsr_u64(unsigned long long __X) +{ + return __X & (__X - 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__tzcnt_u64(unsigned long long __X) +{ + return __builtin_ctzll(__X); +} +#endif + +#endif /* __BMIINTRIN_H */ diff --git a/python/clang/5.1/include/cpuid.h b/python/clang/5.1/include/cpuid.h new file mode 100644 index 00000000..8f12caeb --- /dev/null +++ b/python/clang/5.1/include/cpuid.h @@ -0,0 +1,156 @@ +/*===---- cpuid.h - X86 cpu model detection --------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !(__x86_64__ || __i386__) +#error this header is for x86 only +#endif + +/* Features in %ecx for level 1 */ +#define bit_SSE3 0x00000001 +#define bit_PCLMULQDQ 0x00000002 +#define bit_DTES64 0x00000004 +#define bit_MONITOR 0x00000008 +#define bit_DSCPL 0x00000010 +#define bit_VMX 0x00000020 +#define bit_SMX 0x00000040 +#define bit_EIST 0x00000080 +#define bit_TM2 0x00000100 +#define bit_SSSE3 0x00000200 +#define bit_CNXTID 0x00000400 +#define bit_FMA 0x00001000 +#define bit_CMPXCHG16B 0x00002000 +#define bit_xTPR 0x00004000 +#define bit_PDCM 0x00008000 +#define bit_PCID 0x00020000 +#define bit_DCA 0x00040000 +#define bit_SSE41 0x00080000 +#define bit_SSE42 0x00100000 +#define bit_x2APIC 0x00200000 +#define bit_MOVBE 0x00400000 +#define bit_POPCNT 0x00800000 +#define bit_TSCDeadline 0x01000000 +#define bit_AESNI 0x02000000 +#define bit_XSAVE 0x04000000 +#define bit_OSXSAVE 0x08000000 +#define bit_AVX 0x10000000 +#define bit_RDRAND 0x40000000 + +/* Features in %edx for level 1 */ +#define bit_FPU 0x00000001 +#define bit_VME 0x00000002 +#define bit_DE 0x00000004 +#define bit_PSE 0x00000008 +#define bit_TSC 0x00000010 +#define bit_MSR 0x00000020 +#define bit_PAE 0x00000040 +#define bit_MCE 0x00000080 +#define bit_CX8 0x00000100 +#define bit_APIC 0x00000200 +#define bit_SEP 0x00000800 +#define bit_MTRR 0x00001000 +#define bit_PGE 0x00002000 +#define bit_MCA 0x00004000 +#define bit_CMOV 0x00008000 +#define bit_PAT 0x00010000 +#define bit_PSE36 0x00020000 +#define bit_PSN 0x00040000 +#define bit_CLFSH 0x00080000 +#define bit_DS 0x00200000 +#define bit_ACPI 0x00400000 +#define bit_MMX 0x00800000 +#define bit_FXSR 0x01000000 +#define bit_SSE 0x02000000 +#define bit_SSE2 0x04000000 +#define bit_SS 0x08000000 +#define bit_HTT 0x10000000 +#define bit_TM 0x20000000 +#define bit_PBE 0x80000000 + +/* Features in %ebx for level 7 sub-leaf 0 */ +#define bit_FSGSBASE 0x00000001 +#define bit_SMEP 0x00000080 +#define bit_ENH_MOVSB 0x00000200 + +/* PIC on i386 uses %ebx, so preserve it. */ +#if __i386__ +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm(" pushl %%ebx\n" \ + " cpuid\n" \ + " mov %%ebx,%1\n" \ + " popl %%ebx" \ + : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm(" pushl %%ebx\n" \ + " cpuid\n" \ + " mov %%ebx,%1\n" \ + " popl %%ebx" \ + : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#else +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#endif + +static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, + unsigned int *__ebx, unsigned int *__ecx, + unsigned int *__edx) { + __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx); + return 1; +} + +static __inline int __get_cpuid_max (unsigned int __level, unsigned int *__sig) +{ + unsigned int __eax, __ebx, __ecx, __edx; +#if __i386__ + int __cpuid_supported; + + __asm(" pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x00200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " movl $0,%0\n" + " cmpl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + "1:" + : "=r" (__cpuid_supported) : : "eax", "ecx"); + if (!__cpuid_supported) + return 0; +#endif + + __cpuid(__level, __eax, __ebx, __ecx, __edx); + if (__sig) + *__sig = __ebx; + return __eax; +} diff --git a/python/clang/5.1/include/emmintrin.h b/python/clang/5.1/include/emmintrin.h new file mode 100644 index 00000000..b3f85695 --- /dev/null +++ b/python/clang/5.1/include/emmintrin.h @@ -0,0 +1,1451 @@ +/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __EMMINTRIN_H +#define __EMMINTRIN_H + +#ifndef __SSE2__ +#error "SSE2 instruction set not enabled" +#else + +#include + +typedef double __m128d __attribute__((__vector_size__(16))); +typedef long long __m128i __attribute__((__vector_size__(16))); + +/* Type defines. */ +typedef double __v2df __attribute__ ((__vector_size__ (16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); +typedef short __v8hi __attribute__((__vector_size__(16))); +typedef char __v16qi __attribute__((__vector_size__(16))); + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_add_sd(__m128d __a, __m128d __b) +{ + __a[0] += __b[0]; + return __a; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_add_pd(__m128d __a, __m128d __b) +{ + return __a + __b; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_sub_sd(__m128d __a, __m128d __b) +{ + __a[0] -= __b[0]; + return __a; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_sub_pd(__m128d __a, __m128d __b) +{ + return __a - __b; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_mul_sd(__m128d __a, __m128d __b) +{ + __a[0] *= __b[0]; + return __a; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_mul_pd(__m128d __a, __m128d __b) +{ + return __a * __b; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_div_sd(__m128d __a, __m128d __b) +{ + __a[0] /= __b[0]; + return __a; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_div_pd(__m128d __a, __m128d __b) +{ + return __a / __b; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_sqrt_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_sqrtsd(__b); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_sqrt_pd(__m128d __a) +{ + return __builtin_ia32_sqrtpd(__a); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_min_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_minsd(__a, __b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_min_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_minpd(__a, __b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_max_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_maxsd(__a, __b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_max_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_maxpd(__a, __b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_and_pd(__m128d __a, __m128d __b) +{ + return (__m128d)((__v4si)__a & (__v4si)__b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_andnot_pd(__m128d __a, __m128d __b) +{ + return (__m128d)(~(__v4si)__a & (__v4si)__b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_or_pd(__m128d __a, __m128d __b) +{ + return (__m128d)((__v4si)__a | (__v4si)__b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_xor_pd(__m128d __a, __m128d __b) +{ + return (__m128d)((__v4si)__a ^ (__v4si)__b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__a, __b, 0); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__a, __b, 1); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__a, __b, 2); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__b, __a, 1); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__b, __a, 2); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpord_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__a, __b, 7); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpunord_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__a, __b, 3); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__a, __b, 4); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnlt_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__a, __b, 5); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnle_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__a, __b, 6); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpngt_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__b, __a, 5); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnge_pd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmppd(__b, __a, 6); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpord_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpunord_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnlt_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnle_sd(__m128d __a, __m128d __b) +{ + return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpngt_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnge_sd(__m128d __a, __m128d __b) +{ + __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6); + return (__m128d) { __c[0], __a[1] }; +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comieq_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdeq(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comilt_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdlt(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comile_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdle(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comigt_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdgt(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comige_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdge(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comineq_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_comisdneq(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomieq_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdeq(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomilt_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdlt(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomile_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdle(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomigt_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdgt(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomige_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdge(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomineq_sd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_ucomisdneq(__a, __b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpd_ps(__m128d __a) +{ + return __builtin_ia32_cvtpd2ps(__a); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cvtps_pd(__m128 __a) +{ + return __builtin_ia32_cvtps2pd(__a); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepi32_pd(__m128i __a) +{ + return __builtin_ia32_cvtdq2pd((__v4si)__a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpd_epi32(__m128d __a) +{ + return __builtin_ia32_cvtpd2dq(__a); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsd_si32(__m128d __a) +{ + return __builtin_ia32_cvtsd2si(__a); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsd_ss(__m128 __a, __m128d __b) +{ + __a[0] = __b[0]; + return __a; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi32_sd(__m128d __a, int __b) +{ + __a[0] = __b; + return __a; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cvtss_sd(__m128d __a, __m128 __b) +{ + __a[0] = __b[0]; + return __a; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvttpd_epi32(__m128d __a) +{ + return (__m128i)__builtin_ia32_cvttpd2dq(__a); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_cvttsd_si32(__m128d __a) +{ + return __a[0]; +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpd_pi32(__m128d __a) +{ + return (__m64)__builtin_ia32_cvtpd2pi(__a); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvttpd_pi32(__m128d __a) +{ + return (__m64)__builtin_ia32_cvttpd2pi(__a); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpi32_pd(__m64 __a) +{ + return __builtin_ia32_cvtpi2pd((__v2si)__a); +} + +static __inline__ double __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsd_f64(__m128d __a) +{ + return __a[0]; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_load_pd(double const *__dp) +{ + return *(__m128d*)__dp; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_load1_pd(double const *__dp) +{ + struct __mm_load1_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + double __u = ((struct __mm_load1_pd_struct*)__dp)->__u; + return (__m128d){ __u, __u }; +} + +#define _mm_load_pd1(dp) _mm_load1_pd(dp) + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_loadr_pd(double const *__dp) +{ + __m128d __u = *(__m128d*)__dp; + return __builtin_shufflevector(__u, __u, 1, 0); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_loadu_pd(double const *__dp) +{ + struct __loadu_pd { + __m128d __v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_pd*)__dp)->__v; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_load_sd(double const *__dp) +{ + struct __mm_load_sd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + double __u = ((struct __mm_load_sd_struct*)__dp)->__u; + return (__m128d){ __u, 0 }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_loadh_pd(__m128d __a, double const *__dp) +{ + struct __mm_loadh_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u; + return (__m128d){ __a[0], __u }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_loadl_pd(__m128d __a, double const *__dp) +{ + struct __mm_loadl_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u; + return (__m128d){ __u, __a[1] }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_set_sd(double __w) +{ + return (__m128d){ __w, 0 }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_set1_pd(double __w) +{ + return (__m128d){ __w, __w }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_set_pd(double __w, double __x) +{ + return (__m128d){ __x, __w }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_setr_pd(double __w, double __x) +{ + return (__m128d){ __w, __x }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_setzero_pd(void) +{ + return (__m128d){ 0, 0 }; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_move_sd(__m128d __a, __m128d __b) +{ + return (__m128d){ __b[0], __a[1] }; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_store_sd(double *__dp, __m128d __a) +{ + struct __mm_store_sd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_store1_pd(double *__dp, __m128d __a) +{ + struct __mm_store1_pd_struct { + double __u[2]; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; + ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_store_pd(double *__dp, __m128d __a) +{ + *(__m128d *)__dp = __a; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_storeu_pd(double *__dp, __m128d __a) +{ + __builtin_ia32_storeupd(__dp, __a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_storer_pd(double *__dp, __m128d __a) +{ + __a = __builtin_shufflevector(__a, __a, 1, 0); + *(__m128d *)__dp = __a; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_storeh_pd(double *__dp, __m128d __a) +{ + struct __mm_storeh_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1]; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_storel_pd(double *__dp, __m128d __a) +{ + struct __mm_storeh_pd_struct { + double __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0]; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_add_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)((__v16qi)__a + (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_add_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a + (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_add_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)((__v4si)__a + (__v4si)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_add_si64(__m64 __a, __m64 __b) +{ + return __a + __b; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_add_epi64(__m128i __a, __m128i __b) +{ + return __a + __b; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_adds_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_adds_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_adds_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_adds_epu16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_avg_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_avg_epu16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_madd_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_max_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_max_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_min_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_min_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_mulhi_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_mulhi_epu16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_mullo_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a * (__v8hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_mul_su32(__m64 __a, __m64 __b) +{ + return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_mul_epu32(__m128i __a, __m128i __b) +{ + return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sad_epu8(__m128i __a, __m128i __b) +{ + return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sub_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)((__v16qi)__a - (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sub_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a - (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sub_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)((__v4si)__a - (__v4si)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sub_si64(__m64 __a, __m64 __b) +{ + return __a - __b; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sub_epi64(__m128i __a, __m128i __b) +{ + return __a - __b; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_subs_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_subs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_subs_epu8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_subs_epu16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_and_si128(__m128i __a, __m128i __b) +{ + return __a & __b; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_andnot_si128(__m128i __a, __m128i __b) +{ + return ~__a & __b; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_or_si128(__m128i __a, __m128i __b) +{ + return __a | __b; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_xor_si128(__m128i __a, __m128i __b) +{ + return __a ^ __b; +} + +#define _mm_slli_si128(a, count) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ + __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ + (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_slli_epi16(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sll_epi16(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_slli_epi32(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sll_epi32(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_slli_epi64(__m128i __a, int __count) +{ + return __builtin_ia32_psllqi128(__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sll_epi64(__m128i __a, __m128i __count) +{ + return __builtin_ia32_psllq128(__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srai_epi16(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sra_epi16(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srai_epi32(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sra_epi32(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); +} + + +#define _mm_srli_si128(a, count) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ + __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ + (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srli_epi16(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srl_epi16(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srli_epi32(__m128i __a, int __count) +{ + return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srl_epi32(__m128i __a, __m128i __count) +{ + return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srli_epi64(__m128i __a, int __count) +{ + return __builtin_ia32_psrlqi128(__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_srl_epi64(__m128i __a, __m128i __count) +{ + return __builtin_ia32_psrlq128(__a, __count); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)((__v16qi)__a == (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a == (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)((__v4si)__a == (__v4si)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epi8(__m128i __a, __m128i __b) +{ + /* This function always performs a signed comparison, but __v16qi is a char + which may be signed or unsigned. */ + typedef signed char __v16qs __attribute__((__vector_size__(16))); + return (__m128i)((__v16qs)__a > (__v16qs)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)((__v8hi)__a > (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)((__v4si)__a > (__v4si)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epi8(__m128i __a, __m128i __b) +{ + return _mm_cmpgt_epi8(__b, __a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epi16(__m128i __a, __m128i __b) +{ + return _mm_cmpgt_epi16(__b, __a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_epi32(__m128i __a, __m128i __b) +{ + return _mm_cmpgt_epi32(__b, __a); +} + +#ifdef __x86_64__ +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi64_sd(__m128d __a, long long __b) +{ + __a[0] = __b; + return __a; +} + +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsd_si64(__m128d __a) +{ + return __builtin_ia32_cvtsd2si64(__a); +} + +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +_mm_cvttsd_si64(__m128d __a) +{ + return __a[0]; +} +#endif + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepi32_ps(__m128i __a) +{ + return __builtin_ia32_cvtdq2ps((__v4si)__a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtps_epi32(__m128 __a) +{ + return (__m128i)__builtin_ia32_cvtps2dq(__a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvttps_epi32(__m128 __a) +{ + return (__m128i)__builtin_ia32_cvttps2dq(__a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi32_si128(int __a) +{ + return (__m128i)(__v4si){ __a, 0, 0, 0 }; +} + +#ifdef __x86_64__ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi64_si128(long long __a) +{ + return (__m128i){ __a, 0 }; +} +#endif + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi128_si32(__m128i __a) +{ + __v4si __b = (__v4si)__a; + return __b[0]; +} + +#ifdef __x86_64__ +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi128_si64(__m128i __a) +{ + return __a[0]; +} +#endif + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_load_si128(__m128i const *__p) +{ + return *__p; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_loadu_si128(__m128i const *__p) +{ + struct __loadu_si128 { + __m128i __v; + } __attribute__((packed, may_alias)); + return ((struct __loadu_si128*)__p)->__v; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_loadl_epi64(__m128i const *__p) +{ + struct __mm_loadl_epi64_struct { + long long __u; + } __attribute__((__packed__, __may_alias__)); + return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0}; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set_epi64x(long long q1, long long q0) +{ + return (__m128i){ q0, q1 }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set_epi64(__m64 q1, __m64 q0) +{ + return (__m128i){ (long long)q0, (long long)q1 }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set_epi32(int i3, int i2, int i1, int i0) +{ + return (__m128i)(__v4si){ i0, i1, i2, i3}; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0) +{ + return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0) +{ + return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set1_epi64x(long long __q) +{ + return (__m128i){ __q, __q }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set1_epi64(__m64 __q) +{ + return (__m128i){ (long long)__q, (long long)__q }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set1_epi32(int __i) +{ + return (__m128i)(__v4si){ __i, __i, __i, __i }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set1_epi16(short __w) +{ + return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_set1_epi8(char __b) +{ + return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_setr_epi64(__m64 q0, __m64 q1) +{ + return (__m128i){ (long long)q0, (long long)q1 }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_setr_epi32(int i0, int i1, int i2, int i3) +{ + return (__m128i)(__v4si){ i0, i1, i2, i3}; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7) +{ + return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15) +{ + return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_setzero_si128(void) +{ + return (__m128i){ 0LL, 0LL }; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_store_si128(__m128i *__p, __m128i __b) +{ + *__p = __b; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_storeu_si128(__m128i *__p, __m128i __b) +{ + __builtin_ia32_storedqu((char *)__p, (__v16qi)__b); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) +{ + __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_storel_epi64(__m128i *__p, __m128i __a) +{ + struct __mm_storel_epi64_struct { + long long __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0]; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_pd(double *__p, __m128d __a) +{ + __builtin_ia32_movntpd(__p, __a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_si128(__m128i *__p, __m128i __a) +{ + __builtin_ia32_movntdq(__p, __a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_si32(int *__p, int __a) +{ + __builtin_ia32_movnti(__p, __a); +} + +#ifdef __x86_64__ +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_si64(long long *__p, long long __a) +{ + __builtin_ia32_movnti64(__p, __a); +} +#endif + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_clflush(void const *__p) +{ + __builtin_ia32_clflush(__p); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_lfence(void) +{ + __builtin_ia32_lfence(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_mfence(void) +{ + __builtin_ia32_mfence(); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_packs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_packs_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_packus_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_extract_epi16(__m128i __a, int __imm) +{ + __v8hi __b = (__v8hi)__a; + return (unsigned short)__b[__imm & 7]; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_insert_epi16(__m128i __a, int __b, int __imm) +{ + __v8hi __c = (__v8hi)__a; + __c[__imm & 7] = __b; + return (__m128i)__c; +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_movemask_epi8(__m128i __a) +{ + return __builtin_ia32_pmovmskb128((__v16qi)__a); +} + +#define _mm_shuffle_epi32(a, imm) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ + __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ + (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \ + (imm) & 0x3, ((imm) & 0xc) >> 2, \ + ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); }) + +#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ + __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ + (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ + (imm) & 0x3, ((imm) & 0xc) >> 2, \ + ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ + 4, 5, 6, 7); }) + +#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ + __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ + (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ + 0, 1, 2, 3, \ + 4 + (((imm) & 0x03) >> 0), \ + 4 + (((imm) & 0x0c) >> 2), \ + 4 + (((imm) & 0x30) >> 4), \ + 4 + (((imm) & 0xc0) >> 6)); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_epi64(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_epi64(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_movepi64_pi64(__m128i __a) +{ + return (__m64)__a[0]; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_movpi64_epi64(__m64 __a) +{ + return (__m128i){ (long long)__a, 0 }; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_move_epi64(__m128i __a) +{ + return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_pd(__m128d __a, __m128d __b) +{ + return __builtin_shufflevector(__a, __b, 1, 2+1); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_pd(__m128d __a, __m128d __b) +{ + return __builtin_shufflevector(__a, __b, 0, 2+0); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_movemask_pd(__m128d __a) +{ + return __builtin_ia32_movmskpd(__a); +} + +#define _mm_shuffle_pd(a, b, i) __extension__ ({ \ + _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ + __m128d __a = (a); \ + __m128d __b = (b); \ + _Pragma("clang diagnostic pop"); \ + __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); }) + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_castpd_ps(__m128d __a) +{ + return (__m128)__a; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_castpd_si128(__m128d __a) +{ + return (__m128i)__a; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_castps_pd(__m128 __a) +{ + return (__m128d)__a; +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_castps_si128(__m128 __a) +{ + return (__m128i)__a; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_castsi128_ps(__m128i __a) +{ + return (__m128)__a; +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_castsi128_pd(__m128i __a) +{ + return (__m128d)__a; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_pause(void) +{ + __asm__ volatile ("pause"); +} + +#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) + +#endif /* __SSE2__ */ + +#endif /* __EMMINTRIN_H */ diff --git a/python/clang/5.1/include/f16cintrin.h b/python/clang/5.1/include/f16cintrin.h new file mode 100644 index 00000000..f3614c0e --- /dev/null +++ b/python/clang/5.1/include/f16cintrin.h @@ -0,0 +1,58 @@ +/*===---- f16cintrin.h - F16C intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __F16C__ +# error "F16C instruction is not enabled" +#endif /* __F16C__ */ + +#ifndef __F16CINTRIN_H +#define __F16CINTRIN_H + +typedef float __v8sf __attribute__ ((__vector_size__ (32))); +typedef float __m256 __attribute__ ((__vector_size__ (32))); + +#define _mm_cvtps_ph(a, imm) __extension__ ({ \ + __m128 __a = (a); \ + (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); }) + +#define _mm256_cvtps_ph(a, imm) __extension__ ({ \ + __m256 __a = (a); \ + (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); }) + +static __inline __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtph_ps(__m128i __a) +{ + return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); +} + +static __inline __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_cvtph_ps(__m128i __a) +{ + return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); +} + +#endif /* __F16CINTRIN_H */ diff --git a/python/clang/5.1/include/float.h b/python/clang/5.1/include/float.h new file mode 100644 index 00000000..2cb13d3e --- /dev/null +++ b/python/clang/5.1/include/float.h @@ -0,0 +1,124 @@ +/*===---- float.h - Characteristics of floating point types ----------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __FLOAT_H +#define __FLOAT_H + +/* If we're on MinGW, fall back to the system's float.h, which might have + * additional definitions provided for Windows. + * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx + */ +#if (defined(__MINGW32__) || defined(_MSC_VER)) && \ + defined(__has_include_next) && __has_include_next() +# include_next + +/* Undefine anything that we'll be redefining below. */ +# undef FLT_EVAL_METHOD +# undef FLT_ROUNDS +# undef FLT_RADIX +# undef FLT_MANT_DIG +# undef DBL_MANT_DIG +# undef LDBL_MANT_DIG +# undef DECIMAL_DIG +# undef FLT_DIG +# undef DBL_DIG +# undef LDBL_DIG +# undef FLT_MIN_EXP +# undef DBL_MIN_EXP +# undef LDBL_MIN_EXP +# undef FLT_MIN_10_EXP +# undef DBL_MIN_10_EXP +# undef LDBL_MIN_10_EXP +# undef FLT_MAX_EXP +# undef DBL_MAX_EXP +# undef LDBL_MAX_EXP +# undef FLT_MAX_10_EXP +# undef DBL_MAX_10_EXP +# undef LDBL_MAX_10_EXP +# undef FLT_MAX +# undef DBL_MAX +# undef LDBL_MAX +# undef FLT_EPSILON +# undef DBL_EPSILON +# undef LDBL_EPSILON +# undef FLT_MIN +# undef DBL_MIN +# undef LDBL_MIN +# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) +# undef FLT_TRUE_MIN +# undef DBL_TRUE_MIN +# undef LDBL_TRUE_MIN +# endif +#endif + +/* Characteristics of floating point types, C99 5.2.4.2.2 */ + +#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ +#define FLT_ROUNDS (__builtin_flt_rounds()) +#define FLT_RADIX __FLT_RADIX__ + +#define FLT_MANT_DIG __FLT_MANT_DIG__ +#define DBL_MANT_DIG __DBL_MANT_DIG__ +#define LDBL_MANT_DIG __LDBL_MANT_DIG__ + +#define DECIMAL_DIG __DECIMAL_DIG__ + +#define FLT_DIG __FLT_DIG__ +#define DBL_DIG __DBL_DIG__ +#define LDBL_DIG __LDBL_DIG__ + +#define FLT_MIN_EXP __FLT_MIN_EXP__ +#define DBL_MIN_EXP __DBL_MIN_EXP__ +#define LDBL_MIN_EXP __LDBL_MIN_EXP__ + +#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__ +#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__ +#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__ + +#define FLT_MAX_EXP __FLT_MAX_EXP__ +#define DBL_MAX_EXP __DBL_MAX_EXP__ +#define LDBL_MAX_EXP __LDBL_MAX_EXP__ + +#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__ +#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__ +#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__ + +#define FLT_MAX __FLT_MAX__ +#define DBL_MAX __DBL_MAX__ +#define LDBL_MAX __LDBL_MAX__ + +#define FLT_EPSILON __FLT_EPSILON__ +#define DBL_EPSILON __DBL_EPSILON__ +#define LDBL_EPSILON __LDBL_EPSILON__ + +#define FLT_MIN __FLT_MIN__ +#define DBL_MIN __DBL_MIN__ +#define LDBL_MIN __LDBL_MIN__ + +#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) +# define FLT_TRUE_MIN __FLT_DENORM_MIN__ +# define DBL_TRUE_MIN __DBL_DENORM_MIN__ +# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ +#endif + +#endif /* __FLOAT_H */ diff --git a/python/clang/5.1/include/fma4intrin.h b/python/clang/5.1/include/fma4intrin.h new file mode 100644 index 00000000..c30920df --- /dev/null +++ b/python/clang/5.1/include/fma4intrin.h @@ -0,0 +1,231 @@ +/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __FMA4INTRIN_H +#define __FMA4INTRIN_H + +#ifndef __FMA4__ +# error "FMA4 instruction set is not enabled" +#else + +#include + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); +} + +#endif /* __FMA4__ */ + +#endif /* __FMA4INTRIN_H */ diff --git a/python/clang/5.1/include/fmaintrin.h b/python/clang/5.1/include/fmaintrin.h new file mode 100644 index 00000000..6bfd5a88 --- /dev/null +++ b/python/clang/5.1/include/fmaintrin.h @@ -0,0 +1,229 @@ +/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __FMAINTRIN_H +#define __FMAINTRIN_H + +#ifndef __FMA__ +# error "FMA instruction set is not enabled" +#else + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); +} + +#endif /* __FMA__ */ + +#endif /* __FMAINTRIN_H */ diff --git a/python/clang/5.1/include/immintrin.h b/python/clang/5.1/include/immintrin.h new file mode 100644 index 00000000..15d6e05f --- /dev/null +++ b/python/clang/5.1/include/immintrin.h @@ -0,0 +1,118 @@ +/*===---- immintrin.h - Intel intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#define __IMMINTRIN_H + +#ifdef __MMX__ +#include +#endif + +#ifdef __SSE__ +#include +#endif + +#ifdef __SSE2__ +#include +#endif + +#ifdef __SSE3__ +#include +#endif + +#ifdef __SSSE3__ +#include +#endif + +#if defined (__SSE4_2__) || defined (__SSE4_1__) +#include +#endif + +#if defined (__AES__) +#include +#endif + +#ifdef __AVX__ +#include +#endif + +#ifdef __AVX2__ +#include +#endif + +#ifdef __BMI__ +#include +#endif + +#ifdef __BMI2__ +#include +#endif + +#ifdef __LZCNT__ +#include +#endif + +#ifdef __FMA__ +#include +#endif + +#ifdef __RDRND__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand16_step(unsigned short *__p) +{ + return __builtin_ia32_rdrand16_step(__p); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand32_step(unsigned int *__p) +{ + return __builtin_ia32_rdrand32_step(__p); +} + +#ifdef __x86_64__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand64_step(unsigned long long *__p) +{ + return __builtin_ia32_rdrand64_step(__p); +} +#endif +#endif /* __RDRND__ */ + +#ifdef __RTM__ +#include +#endif + +/* FIXME: check __HLE__ as well when HLE is supported. */ +#if defined (__RTM__) +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_xtest(void) +{ + return __builtin_ia32_xtest(); +} +#endif + +#ifdef __SHA__ +#include +#endif + +#endif /* __IMMINTRIN_H */ diff --git a/python/clang/5.1/include/iso646.h b/python/clang/5.1/include/iso646.h new file mode 100644 index 00000000..dca13c5b --- /dev/null +++ b/python/clang/5.1/include/iso646.h @@ -0,0 +1,43 @@ +/*===---- iso646.h - Standard header for alternate spellings of operators---=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __ISO646_H +#define __ISO646_H + +#ifndef __cplusplus +#define and && +#define and_eq &= +#define bitand & +#define bitor | +#define compl ~ +#define not ! +#define not_eq != +#define or || +#define or_eq |= +#define xor ^ +#define xor_eq ^= +#endif + +#endif /* __ISO646_H */ diff --git a/python/clang/5.1/include/limits.h b/python/clang/5.1/include/limits.h new file mode 100644 index 00000000..91bd4046 --- /dev/null +++ b/python/clang/5.1/include/limits.h @@ -0,0 +1,119 @@ +/*===---- limits.h - Standard header for integer sizes --------------------===*\ + * + * Copyright (c) 2009 Chris Lattner + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __CLANG_LIMITS_H +#define __CLANG_LIMITS_H + +/* The system's limits.h may, in turn, try to #include_next GCC's limits.h. + Avert this #include_next madness. */ +#if defined __GNUC__ && !defined _GCC_LIMITS_H_ +#define _GCC_LIMITS_H_ +#endif + +/* System headers include a number of constants from POSIX in . + Include it if we're hosted. */ +#if __STDC_HOSTED__ && \ + defined(__has_include_next) && __has_include_next() +#include_next +#endif + +/* Many system headers try to "help us out" by defining these. No really, we + know how big each datatype is. */ +#undef SCHAR_MIN +#undef SCHAR_MAX +#undef UCHAR_MAX +#undef SHRT_MIN +#undef SHRT_MAX +#undef USHRT_MAX +#undef INT_MIN +#undef INT_MAX +#undef UINT_MAX +#undef LONG_MIN +#undef LONG_MAX +#undef ULONG_MAX + +#undef CHAR_BIT +#undef CHAR_MIN +#undef CHAR_MAX + +/* C90/99 5.2.4.2.1 */ +#define SCHAR_MAX __SCHAR_MAX__ +#define SHRT_MAX __SHRT_MAX__ +#define INT_MAX __INT_MAX__ +#define LONG_MAX __LONG_MAX__ + +#define SCHAR_MIN (-__SCHAR_MAX__-1) +#define SHRT_MIN (-__SHRT_MAX__ -1) +#define INT_MIN (-__INT_MAX__ -1) +#define LONG_MIN (-__LONG_MAX__ -1L) + +#define UCHAR_MAX (__SCHAR_MAX__*2 +1) +#define USHRT_MAX (__SHRT_MAX__ *2 +1) +#define UINT_MAX (__INT_MAX__ *2U +1U) +#define ULONG_MAX (__LONG_MAX__ *2UL+1UL) + +#ifndef MB_LEN_MAX +#define MB_LEN_MAX 1 +#endif + +#define CHAR_BIT __CHAR_BIT__ + +#ifdef __CHAR_UNSIGNED__ /* -funsigned-char */ +#define CHAR_MIN 0 +#define CHAR_MAX UCHAR_MAX +#else +#define CHAR_MIN SCHAR_MIN +#define CHAR_MAX __SCHAR_MAX__ +#endif + +/* C99 5.2.4.2.1: Added long long. + C++11 18.3.3.2: same contents as the Standard C Library header . + */ +#if __STDC_VERSION__ >= 199901 || __cplusplus >= 201103L + +#undef LLONG_MIN +#undef LLONG_MAX +#undef ULLONG_MAX + +#define LLONG_MAX __LONG_LONG_MAX__ +#define LLONG_MIN (-__LONG_LONG_MAX__-1LL) +#define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) +#endif + +/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad + that we don't have something like #pragma poison that could be used to + deprecate a macro - the code should just use LLONG_MAX and friends. + */ +#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__) + +#undef LONG_LONG_MIN +#undef LONG_LONG_MAX +#undef ULONG_LONG_MAX + +#define LONG_LONG_MAX __LONG_LONG_MAX__ +#define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL) +#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) +#endif + +#endif /* __CLANG_LIMITS_H */ diff --git a/python/clang/5.1/include/lzcntintrin.h b/python/clang/5.1/include/lzcntintrin.h new file mode 100644 index 00000000..62ab5ca2 --- /dev/null +++ b/python/clang/5.1/include/lzcntintrin.h @@ -0,0 +1,55 @@ +/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __LZCNT__ +# error "LZCNT instruction is not enabled" +#endif /* __LZCNT__ */ + +#ifndef __LZCNTINTRIN_H +#define __LZCNTINTRIN_H + +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +__lzcnt16(unsigned short __X) +{ + return __builtin_clzs(__X); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__lzcnt32(unsigned int __X) +{ + return __builtin_clz(__X); +} + +#ifdef __x86_64__ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__lzcnt64(unsigned long long __X) +{ + return __builtin_clzll(__X); +} +#endif + +#endif /* __LZCNTINTRIN_H */ diff --git a/python/clang/5.1/include/mm3dnow.h b/python/clang/5.1/include/mm3dnow.h new file mode 100644 index 00000000..5242d99c --- /dev/null +++ b/python/clang/5.1/include/mm3dnow.h @@ -0,0 +1,162 @@ +/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _MM3DNOW_H_INCLUDED +#define _MM3DNOW_H_INCLUDED + +#include +#include + +typedef float __v2sf __attribute__((__vector_size__(8))); + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_m_femms() { + __builtin_ia32_femms(); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pavgusb(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pf2id(__m64 __m) { + return (__m64)__builtin_ia32_pf2id((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfadd(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfcmpeq(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfcmpge(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfcmpgt(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfmax(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfmin(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfmul(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrcp(__m64 __m) { + return (__m64)__builtin_ia32_pfrcp((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrcpit1(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrcpit2(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrsqrt(__m64 __m) { + return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfrsqrtit1(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfsub(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfsubr(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pi2fd(__m64 __m) { + return (__m64)__builtin_ia32_pi2fd((__v2si)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pmulhrw(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pf2iw(__m64 __m) { + return (__m64)__builtin_ia32_pf2iw((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfnacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pfpnacc(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pi2fw(__m64 __m) { + return (__m64)__builtin_ia32_pi2fw((__v2si)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pswapdsf(__m64 __m) { + return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_m_pswapdsi(__m64 __m) { + return (__m64)__builtin_ia32_pswapdsi((__v2si)__m); +} + +#endif diff --git a/python/clang/5.1/include/mm_malloc.h b/python/clang/5.1/include/mm_malloc.h new file mode 100644 index 00000000..305afd31 --- /dev/null +++ b/python/clang/5.1/include/mm_malloc.h @@ -0,0 +1,75 @@ +/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __MM_MALLOC_H +#define __MM_MALLOC_H + +#include + +#ifdef _WIN32 +#include +#else +#ifndef __cplusplus +extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); +#else +// Some systems (e.g. those with GNU libc) declare posix_memalign with an +// exception specifier. Via an "egregious workaround" in +// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid +// redeclaration of glibc's declaration. +extern "C" int posix_memalign(void **__memptr, size_t __alignment, size_t __size); +#endif +#endif + +#if !(defined(_WIN32) && defined(_mm_malloc)) +static __inline__ void *__attribute__((__always_inline__, __nodebug__, + __malloc__)) +_mm_malloc(size_t __size, size_t __align) +{ + if (__align == 1) { + return malloc(__size); + } + + if (!(__align & (__align - 1)) && __align < sizeof(void *)) + __align = sizeof(void *); + + void *__mallocedMemory; +#if defined(__MINGW32__) + __mallocedMemory = __mingw_aligned_malloc(__size, __align); +#elif defined(_WIN32) + __mallocedMemory = _aligned_malloc(__size, __align); +#else + if (posix_memalign(&__mallocedMemory, __align, __size)) + return 0; +#endif + + return __mallocedMemory; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_free(void *__p) +{ + free(__p); +} +#endif + +#endif /* __MM_MALLOC_H */ diff --git a/python/clang/5.1/include/mmintrin.h b/python/clang/5.1/include/mmintrin.h new file mode 100644 index 00000000..986870a9 --- /dev/null +++ b/python/clang/5.1/include/mmintrin.h @@ -0,0 +1,503 @@ +/*===---- mmintrin.h - MMX intrinsics --------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __MMINTRIN_H +#define __MMINTRIN_H + +#ifndef __MMX__ +#error "MMX instruction set not enabled" +#else + +typedef long long __m64 __attribute__((__vector_size__(8))); + +typedef int __v2si __attribute__((__vector_size__(8))); +typedef short __v4hi __attribute__((__vector_size__(8))); +typedef char __v8qi __attribute__((__vector_size__(8))); + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_empty(void) +{ + __builtin_ia32_emms(); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi32_si64(int __i) +{ + return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi64_si32(__m64 __m) +{ + return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi64_m64(long long __i) +{ + return (__m64)__i; +} + +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +_mm_cvtm64_si64(__m64 __m) +{ + return (long long)__m; +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_packs_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_packs_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_packs_pu16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_add_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_add_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_add_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_adds_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_adds_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_adds_pu8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_adds_pu16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sub_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sub_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sub_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_subs_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_subs_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_subs_pu8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_subs_pu16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_madd_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_mulhi_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_mullo_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sll_pi16(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_slli_pi16(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sll_pi32(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_slli_pi32(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sll_si64(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psllq(__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_slli_si64(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psllqi(__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sra_pi16(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_srai_pi16(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sra_pi32(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_srai_pi32(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_srl_pi16(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_srli_pi16(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_srl_pi32(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_srli_pi32(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_srl_si64(__m64 __m, __m64 __count) +{ + return (__m64)__builtin_ia32_psrlq(__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_srli_si64(__m64 __m, int __count) +{ + return (__m64)__builtin_ia32_psrlqi(__m, __count); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_and_si64(__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pand(__m1, __m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_andnot_si64(__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pandn(__m1, __m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_or_si64(__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_por(__m1, __m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_xor_si64(__m64 __m1, __m64 __m2) +{ + return __builtin_ia32_pxor(__m1, __m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) +{ + return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_setzero_si64(void) +{ + return (__m64){ 0LL }; +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_set_pi32(int __i1, int __i0) +{ + return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) +{ + return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, + char __b1, char __b0) +{ + return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, + __b4, __b5, __b6, __b7); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_set1_pi32(int __i) +{ + return _mm_set_pi32(__i, __i); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_set1_pi16(short __w) +{ + return _mm_set_pi16(__w, __w, __w, __w); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_set1_pi8(char __b) +{ + return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_setr_pi32(int __i0, int __i1) +{ + return _mm_set_pi32(__i1, __i0); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) +{ + return _mm_set_pi16(__w3, __w2, __w1, __w0); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, + char __b6, char __b7) +{ + return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); +} + + +/* Aliases for compatibility. */ +#define _m_empty _mm_empty +#define _m_from_int _mm_cvtsi32_si64 +#define _m_to_int _mm_cvtsi64_si32 +#define _m_packsswb _mm_packs_pi16 +#define _m_packssdw _mm_packs_pi32 +#define _m_packuswb _mm_packs_pu16 +#define _m_punpckhbw _mm_unpackhi_pi8 +#define _m_punpckhwd _mm_unpackhi_pi16 +#define _m_punpckhdq _mm_unpackhi_pi32 +#define _m_punpcklbw _mm_unpacklo_pi8 +#define _m_punpcklwd _mm_unpacklo_pi16 +#define _m_punpckldq _mm_unpacklo_pi32 +#define _m_paddb _mm_add_pi8 +#define _m_paddw _mm_add_pi16 +#define _m_paddd _mm_add_pi32 +#define _m_paddsb _mm_adds_pi8 +#define _m_paddsw _mm_adds_pi16 +#define _m_paddusb _mm_adds_pu8 +#define _m_paddusw _mm_adds_pu16 +#define _m_psubb _mm_sub_pi8 +#define _m_psubw _mm_sub_pi16 +#define _m_psubd _mm_sub_pi32 +#define _m_psubsb _mm_subs_pi8 +#define _m_psubsw _mm_subs_pi16 +#define _m_psubusb _mm_subs_pu8 +#define _m_psubusw _mm_subs_pu16 +#define _m_pmaddwd _mm_madd_pi16 +#define _m_pmulhw _mm_mulhi_pi16 +#define _m_pmullw _mm_mullo_pi16 +#define _m_psllw _mm_sll_pi16 +#define _m_psllwi _mm_slli_pi16 +#define _m_pslld _mm_sll_pi32 +#define _m_pslldi _mm_slli_pi32 +#define _m_psllq _mm_sll_si64 +#define _m_psllqi _mm_slli_si64 +#define _m_psraw _mm_sra_pi16 +#define _m_psrawi _mm_srai_pi16 +#define _m_psrad _mm_sra_pi32 +#define _m_psradi _mm_srai_pi32 +#define _m_psrlw _mm_srl_pi16 +#define _m_psrlwi _mm_srli_pi16 +#define _m_psrld _mm_srl_pi32 +#define _m_psrldi _mm_srli_pi32 +#define _m_psrlq _mm_srl_si64 +#define _m_psrlqi _mm_srli_si64 +#define _m_pand _mm_and_si64 +#define _m_pandn _mm_andnot_si64 +#define _m_por _mm_or_si64 +#define _m_pxor _mm_xor_si64 +#define _m_pcmpeqb _mm_cmpeq_pi8 +#define _m_pcmpeqw _mm_cmpeq_pi16 +#define _m_pcmpeqd _mm_cmpeq_pi32 +#define _m_pcmpgtb _mm_cmpgt_pi8 +#define _m_pcmpgtw _mm_cmpgt_pi16 +#define _m_pcmpgtd _mm_cmpgt_pi32 + +#endif /* __MMX__ */ + +#endif /* __MMINTRIN_H */ + diff --git a/python/clang/5.1/include/module.map b/python/clang/5.1/include/module.map new file mode 100644 index 00000000..09bee9c8 --- /dev/null +++ b/python/clang/5.1/include/module.map @@ -0,0 +1,165 @@ +module _Builtin_intrinsics [system] { + explicit module altivec { + requires altivec + header "altivec.h" + } + + explicit module arm { + requires arm + + explicit module neon { + requires neon + header "arm_neon.h" + export * + } + } + + explicit module arm64 { + requires arm64 + + explicit module simd { + header "aarch64_simd.h" + export * + } + } + + explicit module intel { + requires x86 + export * + + header "immintrin.h" + header "x86intrin.h" + + explicit module mm_malloc { + header "mm_malloc.h" + export * // note: for dependency + } + + explicit module cpuid { + requires x86 + header "cpuid.h" + } + + explicit module mmx { + requires mmx + header "mmintrin.h" + } + + explicit module f16c { + requires f16c + header "f16cintrin.h" + } + + explicit module sse { + requires sse + export mmx + export * // note: for hackish dependency + header "xmmintrin.h" + } + + explicit module sse2 { + requires sse2 + export sse + header "emmintrin.h" + } + + explicit module sse3 { + requires sse3 + export sse2 + header "pmmintrin.h" + } + + explicit module ssse3 { + requires ssse3 + export sse3 + header "tmmintrin.h" + } + + explicit module sse4_1 { + requires sse41 + export ssse3 + header "smmintrin.h" + } + + explicit module sse4_2 { + requires sse42 + export sse4_1 + header "nmmintrin.h" + } + + explicit module sse4a { + requires sse4a + export sse3 + header "ammintrin.h" + } + + explicit module avx { + requires avx + export sse4_2 + header "avxintrin.h" + } + + explicit module avx2 { + requires avx2 + export avx + header "avx2intrin.h" + } + + explicit module bmi { + requires bmi + header "bmiintrin.h" + } + + explicit module bmi2 { + requires bmi2 + header "bmi2intrin.h" + } + + explicit module fma { + requires fma + header "fmaintrin.h" + } + + explicit module fma4 { + requires fma4 + export sse3 + header "fma4intrin.h" + } + + explicit module lzcnt { + requires lzcnt + header "lzcntintrin.h" + } + + explicit module popcnt { + requires popcnt + header "popcntintrin.h" + } + + explicit module mm3dnow { + requires mm3dnow + header "mm3dnow.h" + } + + explicit module xop { + requires xop + export fma4 + header "xopintrin.h" + } + + explicit module aes_pclmul { + requires aes, pclmul + header "wmmintrin.h" + } + + explicit module aes { + requires aes + header "__wmmintrin_aes.h" + } + + explicit module pclmul { + requires pclmul + header "__wmmintrin_pclmul.h" + } + } +} diff --git a/python/clang/5.1/include/nmmintrin.h b/python/clang/5.1/include/nmmintrin.h new file mode 100644 index 00000000..f12622d7 --- /dev/null +++ b/python/clang/5.1/include/nmmintrin.h @@ -0,0 +1,35 @@ +/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _NMMINTRIN_H +#define _NMMINTRIN_H + +#ifndef __SSE4_2__ +#error "SSE4.2 instruction set not enabled" +#else + +/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h, + just include it now then. */ +#include +#endif /* __SSE4_2__ */ +#endif /* _NMMINTRIN_H */ diff --git a/python/clang/5.1/include/pmmintrin.h b/python/clang/5.1/include/pmmintrin.h new file mode 100644 index 00000000..6f1fc329 --- /dev/null +++ b/python/clang/5.1/include/pmmintrin.h @@ -0,0 +1,117 @@ +/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __PMMINTRIN_H +#define __PMMINTRIN_H + +#ifndef __SSE3__ +#error "SSE3 instruction set not enabled" +#else + +#include + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_lddqu_si128(__m128i const *__p) +{ + return (__m128i)__builtin_ia32_lddqu((char const *)__p); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_addsub_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_addsubps(__a, __b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_hadd_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_haddps(__a, __b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_hsub_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_hsubps(__a, __b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_movehdup_ps(__m128 __a) +{ + return __builtin_shufflevector(__a, __a, 1, 1, 3, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_moveldup_ps(__m128 __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0, 2, 2); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_addsub_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_addsubpd(__a, __b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_hadd_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_haddpd(__a, __b); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_hsub_pd(__m128d __a, __m128d __b) +{ + return __builtin_ia32_hsubpd(__a, __b); +} + +#define _mm_loaddup_pd(dp) _mm_load1_pd(dp) + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_movedup_pd(__m128d __a) +{ + return __builtin_shufflevector(__a, __a, 0, 0); +} + +#define _MM_DENORMALS_ZERO_ON (0x0040) +#define _MM_DENORMALS_ZERO_OFF (0x0000) + +#define _MM_DENORMALS_ZERO_MASK (0x0040) + +#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) +#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) +{ + __builtin_ia32_monitor((void *)__p, __extensions, __hints); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_mwait(unsigned __extensions, unsigned __hints) +{ + __builtin_ia32_mwait(__extensions, __hints); +} + +#endif /* __SSE3__ */ + +#endif /* __PMMINTRIN_H */ diff --git a/python/clang/5.1/include/popcntintrin.h b/python/clang/5.1/include/popcntintrin.h new file mode 100644 index 00000000..d439daa8 --- /dev/null +++ b/python/clang/5.1/include/popcntintrin.h @@ -0,0 +1,45 @@ +/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __POPCNT__ +#error "POPCNT instruction set not enabled" +#endif + +#ifndef _POPCNTINTRIN_H +#define _POPCNTINTRIN_H + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_popcnt_u32(unsigned int __A) +{ + return __builtin_popcount(__A); +} + +#ifdef __x86_64__ +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +_mm_popcnt_u64(unsigned long long __A) +{ + return __builtin_popcountll(__A); +} +#endif /* __x86_64__ */ + +#endif /* _POPCNTINTRIN_H */ diff --git a/python/clang/5.1/include/prfchwintrin.h b/python/clang/5.1/include/prfchwintrin.h new file mode 100644 index 00000000..9825bd8c --- /dev/null +++ b/python/clang/5.1/include/prfchwintrin.h @@ -0,0 +1,39 @@ +/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED) +#error "Never use directly; include or instead." +#endif + +#ifndef __PRFCHWINTRIN_H +#define __PRFCHWINTRIN_H + +#if defined(__PRFCHW__) || defined(__3dNOW__) +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_m_prefetchw(void *__P) +{ + __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */); +} +#endif + +#endif /* __PRFCHWINTRIN_H */ diff --git a/python/clang/5.1/include/rdseedintrin.h b/python/clang/5.1/include/rdseedintrin.h new file mode 100644 index 00000000..0fef1fa4 --- /dev/null +++ b/python/clang/5.1/include/rdseedintrin.h @@ -0,0 +1,52 @@ +/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __RDSEEDINTRIN_H +#define __RDSEEDINTRIN_H + +#ifdef __RDSEED__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdseed16_step(unsigned short *__p) +{ + return __builtin_ia32_rdseed16_step(__p); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdseed32_step(unsigned int *__p) +{ + return __builtin_ia32_rdseed32_step(__p); +} + +#ifdef __x86_64__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdseed64_step(unsigned long long *__p) +{ + return __builtin_ia32_rdseed64_step(__p); +} +#endif +#endif /* __RDSEED__ */ +#endif /* __RDSEEDINTRIN_H */ diff --git a/python/clang/5.1/include/rtmintrin.h b/python/clang/5.1/include/rtmintrin.h new file mode 100644 index 00000000..26149ca8 --- /dev/null +++ b/python/clang/5.1/include/rtmintrin.h @@ -0,0 +1,54 @@ +/*===---- rtmintrin.h - RTM intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __RTMINTRIN_H +#define __RTMINTRIN_H + +#define _XBEGIN_STARTED (~0u) +#define _XABORT_EXPLICIT (1 << 0) +#define _XABORT_RETRY (1 << 1) +#define _XABORT_CONFLICT (1 << 2) +#define _XABORT_CAPACITY (1 << 3) +#define _XABORT_DEBUG (1 << 4) +#define _XABORT_NESTED (1 << 5) +#define _XABORT_CODE(x) (((x) >> 24) & 0xFF) + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_xbegin(void) +{ + return __builtin_ia32_xbegin(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_xend(void) +{ + __builtin_ia32_xend(); +} + +#define _xabort(imm) __builtin_ia32_xabort((imm)) + +#endif /* __RTMINTRIN_H */ diff --git a/python/clang/5.1/include/sanitizer/asan_interface.h b/python/clang/5.1/include/sanitizer/asan_interface.h new file mode 100644 index 00000000..8adf3f17 --- /dev/null +++ b/python/clang/5.1/include/sanitizer/asan_interface.h @@ -0,0 +1,137 @@ +//===-- sanitizer/asan_interface.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer. +// +// Public interface header. +//===----------------------------------------------------------------------===// +#ifndef SANITIZER_ASAN_INTERFACE_H +#define SANITIZER_ASAN_INTERFACE_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + // Marks memory region [addr, addr+size) as unaddressable. + // This memory must be previously allocated by the user program. Accessing + // addresses in this region from instrumented code is forbidden until + // this region is unpoisoned. This function is not guaranteed to poison + // the whole region - it may poison only subregion of [addr, addr+size) due + // to ASan alignment restrictions. + // Method is NOT thread-safe in the sense that no two threads can + // (un)poison memory in the same memory region simultaneously. + void __asan_poison_memory_region(void const volatile *addr, size_t size); + // Marks memory region [addr, addr+size) as addressable. + // This memory must be previously allocated by the user program. Accessing + // addresses in this region is allowed until this region is poisoned again. + // This function may unpoison a superregion of [addr, addr+size) due to + // ASan alignment restrictions. + // Method is NOT thread-safe in the sense that no two threads can + // (un)poison memory in the same memory region simultaneously. + void __asan_unpoison_memory_region(void const volatile *addr, size_t size); + +// User code should use macros instead of functions. +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +#define ASAN_POISON_MEMORY_REGION(addr, size) \ + __asan_poison_memory_region((addr), (size)) +#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ + __asan_unpoison_memory_region((addr), (size)) +#else +#define ASAN_POISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) +#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) +#endif + + // Returns true iff addr is poisoned (i.e. 1-byte read/write access to this + // address will result in error report from AddressSanitizer). + bool __asan_address_is_poisoned(void const volatile *addr); + + // If at least on byte in [beg, beg+size) is poisoned, return the address + // of the first such byte. Otherwise return 0. + void *__asan_region_is_poisoned(void *beg, size_t size); + + // Print the description of addr (useful when debugging in gdb). + void __asan_describe_address(void *addr); + + // This is an internal function that is called to report an error. + // However it is still a part of the interface because users may want to + // set a breakpoint on this function in a debugger. + void __asan_report_error(void *pc, void *bp, void *sp, + void *addr, bool is_write, size_t access_size); + + // Sets the exit code to use when reporting an error. + // Returns the old value. + int __asan_set_error_exit_code(int exit_code); + + // Sets the callback to be called right before death on error. + // Passing 0 will unset the callback. + void __asan_set_death_callback(void (*callback)(void)); + + void __asan_set_error_report_callback(void (*callback)(const char*)); + + // User may provide function that would be called right when ASan detects + // an error. This can be used to notice cases when ASan detects an error, but + // the program crashes before ASan report is printed. + void __asan_on_error(); + + // User may provide its own implementation for symbolization function. + // It should print the description of instruction at address "pc" to + // "out_buffer". Description should be at most "out_size" bytes long. + // User-specified function should return true if symbolization was + // successful. + bool __asan_symbolize(const void *pc, char *out_buffer, + int out_size); + + // Returns the estimated number of bytes that will be reserved by allocator + // for request of "size" bytes. If ASan allocator can't allocate that much + // memory, returns the maximal possible allocation size, otherwise returns + // "size". + size_t __asan_get_estimated_allocated_size(size_t size); + // Returns true if p was returned by the ASan allocator and + // is not yet freed. + bool __asan_get_ownership(const void *p); + // Returns the number of bytes reserved for the pointer p. + // Requires (get_ownership(p) == true) or (p == 0). + size_t __asan_get_allocated_size(const void *p); + // Number of bytes, allocated and not yet freed by the application. + size_t __asan_get_current_allocated_bytes(); + // Number of bytes, mmaped by asan allocator to fulfill allocation requests. + // Generally, for request of X bytes, allocator can reserve and add to free + // lists a large number of chunks of size X to use them for future requests. + // All these chunks count toward the heap size. Currently, allocator never + // releases memory to OS (instead, it just puts freed chunks to free lists). + size_t __asan_get_heap_size(); + // Number of bytes, mmaped by asan allocator, which can be used to fulfill + // allocation requests. When a user program frees memory chunk, it can first + // fall into quarantine and will count toward __asan_get_free_bytes() later. + size_t __asan_get_free_bytes(); + // Number of bytes in unmapped pages, that are released to OS. Currently, + // always returns 0. + size_t __asan_get_unmapped_bytes(); + // Prints accumulated stats to stderr. Used for debugging. + void __asan_print_accumulated_stats(); + + // This function may be optionally provided by user and should return + // a string containing ASan runtime options. See asan_flags.h for details. + const char* __asan_default_options(); + + // Malloc hooks that may be optionally provided by user. + // __asan_malloc_hook(ptr, size) is called immediately after + // allocation of "size" bytes, which returned "ptr". + // __asan_free_hook(ptr) is called immediately before + // deallocation of "ptr". + void __asan_malloc_hook(void *ptr, size_t size); + void __asan_free_hook(void *ptr); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // SANITIZER_ASAN_INTERFACE_H diff --git a/python/clang/5.1/include/sanitizer/common_interface_defs.h b/python/clang/5.1/include/sanitizer/common_interface_defs.h new file mode 100644 index 00000000..741958f8 --- /dev/null +++ b/python/clang/5.1/include/sanitizer/common_interface_defs.h @@ -0,0 +1,54 @@ +//===-- sanitizer/common_interface_defs.h -----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common part of the public sanitizer interface. +//===----------------------------------------------------------------------===// + +#ifndef SANITIZER_COMMON_INTERFACE_DEFS_H +#define SANITIZER_COMMON_INTERFACE_DEFS_H + +#include +#include + +// GCC does not understand __has_feature. +#if !defined(__has_feature) +# define __has_feature(x) 0 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + // Tell the tools to write their reports to "path." instead of stderr. + void __sanitizer_set_report_path(const char *path); + + // Notify the tools that the sandbox is going to be turned on. The reserved + // parameter will be used in the future to hold a structure with functions + // that the tools may call to bypass the sandbox. + void __sanitizer_sandbox_on_notify(void *reserved); + + // This function is called by the tool when it has just finished reporting + // an error. 'error_summary' is a one-line string that summarizes + // the error message. This function can be overridden by the client. + void __sanitizer_report_error_summary(const char *error_summary); + + // Some of the sanitizers (e.g. asan/tsan) may miss bugs that happen + // in unaligned loads/stores. In order to find such bugs reliably one needs + // to replace plain unaligned loads/stores with these calls. + uint16_t __sanitizer_unaligned_load16(const void *p); + uint32_t __sanitizer_unaligned_load32(const void *p); + uint64_t __sanitizer_unaligned_load64(const void *p); + void __sanitizer_unaligned_store16(void *p, uint16_t x); + void __sanitizer_unaligned_store32(void *p, uint32_t x); + void __sanitizer_unaligned_store64(void *p, uint64_t x); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // SANITIZER_COMMON_INTERFACE_DEFS_H diff --git a/python/clang/5.1/include/sanitizer/dfsan_interface.h b/python/clang/5.1/include/sanitizer/dfsan_interface.h new file mode 100644 index 00000000..f14d45a2 --- /dev/null +++ b/python/clang/5.1/include/sanitizer/dfsan_interface.h @@ -0,0 +1,87 @@ +//===-- dfsan_interface.h -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of DataFlowSanitizer. +// +// Public interface header. +//===----------------------------------------------------------------------===// +#ifndef DFSAN_INTERFACE_H +#define DFSAN_INTERFACE_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef uint16_t dfsan_label; + +/// Stores information associated with a specific label identifier. A label +/// may be a base label created using dfsan_create_label, with associated +/// text description and user data, or an automatically created union label, +/// which represents the union of two label identifiers (which may themselves +/// be base or union labels). +struct dfsan_label_info { + // Fields for union labels, set to 0 for base labels. + dfsan_label l1; + dfsan_label l2; + + // Fields for base labels. + const char *desc; + void *userdata; +}; + +/// Computes the union of \c l1 and \c l2, possibly creating a union label in +/// the process. +dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2); + +/// Creates and returns a base label with the given description and user data. +dfsan_label dfsan_create_label(const char *desc, void *userdata); + +/// Sets the label for each address in [addr,addr+size) to \c label. +void dfsan_set_label(dfsan_label label, void *addr, size_t size); + +/// Sets the label for each address in [addr,addr+size) to the union of the +/// current label for that address and \c label. +void dfsan_add_label(dfsan_label label, void *addr, size_t size); + +/// Retrieves the label associated with the given data. +/// +/// The type of 'data' is arbitrary. The function accepts a value of any type, +/// which can be truncated or extended (implicitly or explicitly) as necessary. +/// The truncation/extension operations will preserve the label of the original +/// value. +dfsan_label dfsan_get_label(long data); + +/// Retrieves the label associated with the data at the given address. +dfsan_label dfsan_read_label(const void *addr, size_t size); + +/// Retrieves a pointer to the dfsan_label_info struct for the given label. +const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label); + +/// Returns whether the given label label contains the label elem. +int dfsan_has_label(dfsan_label label, dfsan_label elem); + +/// If the given label label contains a label with the description desc, returns +/// that label, else returns 0. +dfsan_label dfsan_has_label_with_desc(dfsan_label label, const char *desc); + +#ifdef __cplusplus +} // extern "C" + +template +void dfsan_set_label(dfsan_label label, T &data) { // NOLINT + dfsan_set_label(label, (void *)&data, sizeof(T)); +} + +#endif + +#endif // DFSAN_INTERFACE_H diff --git a/python/clang/5.1/include/sanitizer/linux_syscall_hooks.h b/python/clang/5.1/include/sanitizer/linux_syscall_hooks.h new file mode 100644 index 00000000..89867c15 --- /dev/null +++ b/python/clang/5.1/include/sanitizer/linux_syscall_hooks.h @@ -0,0 +1,3070 @@ +//===-- linux_syscall_hooks.h ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of public sanitizer interface. +// +// System call handlers. +// +// Interface methods declared in this header implement pre- and post- syscall +// actions for the active sanitizer. +// Usage: +// __sanitizer_syscall_pre_getfoo(...args...); +// long res = syscall(__NR_getfoo, ...args...); +// __sanitizer_syscall_post_getfoo(res, ...args...); +//===----------------------------------------------------------------------===// +#ifndef SANITIZER_LINUX_SYSCALL_HOOKS_H +#define SANITIZER_LINUX_SYSCALL_HOOKS_H + +#define __sanitizer_syscall_pre_time(tloc) \ + __sanitizer_syscall_pre_impl_time((long)(tloc)) +#define __sanitizer_syscall_post_time(res, tloc) \ + __sanitizer_syscall_post_impl_time(res, (long)(tloc)) +#define __sanitizer_syscall_pre_stime(tptr) \ + __sanitizer_syscall_pre_impl_stime((long)(tptr)) +#define __sanitizer_syscall_post_stime(res, tptr) \ + __sanitizer_syscall_post_impl_stime(res, (long)(tptr)) +#define __sanitizer_syscall_pre_gettimeofday(tv, tz) \ + __sanitizer_syscall_pre_impl_gettimeofday((long)(tv), (long)(tz)) +#define __sanitizer_syscall_post_gettimeofday(res, tv, tz) \ + __sanitizer_syscall_post_impl_gettimeofday(res, (long)(tv), (long)(tz)) +#define __sanitizer_syscall_pre_settimeofday(tv, tz) \ + __sanitizer_syscall_pre_impl_settimeofday((long)(tv), (long)(tz)) +#define __sanitizer_syscall_post_settimeofday(res, tv, tz) \ + __sanitizer_syscall_post_impl_settimeofday(res, (long)(tv), (long)(tz)) +#define __sanitizer_syscall_pre_adjtimex(txc_p) \ + __sanitizer_syscall_pre_impl_adjtimex((long)(txc_p)) +#define __sanitizer_syscall_post_adjtimex(res, txc_p) \ + __sanitizer_syscall_post_impl_adjtimex(res, (long)(txc_p)) +#define __sanitizer_syscall_pre_times(tbuf) \ + __sanitizer_syscall_pre_impl_times((long)(tbuf)) +#define __sanitizer_syscall_post_times(res, tbuf) \ + __sanitizer_syscall_post_impl_times(res, (long)(tbuf)) +#define __sanitizer_syscall_pre_gettid() __sanitizer_syscall_pre_impl_gettid() +#define __sanitizer_syscall_post_gettid(res) \ + __sanitizer_syscall_post_impl_gettid(res) +#define __sanitizer_syscall_pre_nanosleep(rqtp, rmtp) \ + __sanitizer_syscall_pre_impl_nanosleep((long)(rqtp), (long)(rmtp)) +#define __sanitizer_syscall_post_nanosleep(res, rqtp, rmtp) \ + __sanitizer_syscall_post_impl_nanosleep(res, (long)(rqtp), (long)(rmtp)) +#define __sanitizer_syscall_pre_alarm(seconds) \ + __sanitizer_syscall_pre_impl_alarm((long)(seconds)) +#define __sanitizer_syscall_post_alarm(res, seconds) \ + __sanitizer_syscall_post_impl_alarm(res, (long)(seconds)) +#define __sanitizer_syscall_pre_getpid() __sanitizer_syscall_pre_impl_getpid() +#define __sanitizer_syscall_post_getpid(res) \ + __sanitizer_syscall_post_impl_getpid(res) +#define __sanitizer_syscall_pre_getppid() __sanitizer_syscall_pre_impl_getppid() +#define __sanitizer_syscall_post_getppid(res) \ + __sanitizer_syscall_post_impl_getppid(res) +#define __sanitizer_syscall_pre_getuid() __sanitizer_syscall_pre_impl_getuid() +#define __sanitizer_syscall_post_getuid(res) \ + __sanitizer_syscall_post_impl_getuid(res) +#define __sanitizer_syscall_pre_geteuid() __sanitizer_syscall_pre_impl_geteuid() +#define __sanitizer_syscall_post_geteuid(res) \ + __sanitizer_syscall_post_impl_geteuid(res) +#define __sanitizer_syscall_pre_getgid() __sanitizer_syscall_pre_impl_getgid() +#define __sanitizer_syscall_post_getgid(res) \ + __sanitizer_syscall_post_impl_getgid(res) +#define __sanitizer_syscall_pre_getegid() __sanitizer_syscall_pre_impl_getegid() +#define __sanitizer_syscall_post_getegid(res) \ + __sanitizer_syscall_post_impl_getegid(res) +#define __sanitizer_syscall_pre_getresuid(ruid, euid, suid) \ + __sanitizer_syscall_pre_impl_getresuid((long)(ruid), (long)(euid), \ + (long)(suid)) +#define __sanitizer_syscall_post_getresuid(res, ruid, euid, suid) \ + __sanitizer_syscall_post_impl_getresuid(res, (long)(ruid), (long)(euid), \ + (long)(suid)) +#define __sanitizer_syscall_pre_getresgid(rgid, egid, sgid) \ + __sanitizer_syscall_pre_impl_getresgid((long)(rgid), (long)(egid), \ + (long)(sgid)) +#define __sanitizer_syscall_post_getresgid(res, rgid, egid, sgid) \ + __sanitizer_syscall_post_impl_getresgid(res, (long)(rgid), (long)(egid), \ + (long)(sgid)) +#define __sanitizer_syscall_pre_getpgid(pid) \ + __sanitizer_syscall_pre_impl_getpgid((long)(pid)) +#define __sanitizer_syscall_post_getpgid(res, pid) \ + __sanitizer_syscall_post_impl_getpgid(res, (long)(pid)) +#define __sanitizer_syscall_pre_getpgrp() __sanitizer_syscall_pre_impl_getpgrp() +#define __sanitizer_syscall_post_getpgrp(res) \ + __sanitizer_syscall_post_impl_getpgrp(res) +#define __sanitizer_syscall_pre_getsid(pid) \ + __sanitizer_syscall_pre_impl_getsid((long)(pid)) +#define __sanitizer_syscall_post_getsid(res, pid) \ + __sanitizer_syscall_post_impl_getsid(res, (long)(pid)) +#define __sanitizer_syscall_pre_getgroups(gidsetsize, grouplist) \ + __sanitizer_syscall_pre_impl_getgroups((long)(gidsetsize), (long)(grouplist)) +#define __sanitizer_syscall_post_getgroups(res, gidsetsize, grouplist) \ + __sanitizer_syscall_post_impl_getgroups(res, (long)(gidsetsize), \ + (long)(grouplist)) +#define __sanitizer_syscall_pre_setregid(rgid, egid) \ + __sanitizer_syscall_pre_impl_setregid((long)(rgid), (long)(egid)) +#define __sanitizer_syscall_post_setregid(res, rgid, egid) \ + __sanitizer_syscall_post_impl_setregid(res, (long)(rgid), (long)(egid)) +#define __sanitizer_syscall_pre_setgid(gid) \ + __sanitizer_syscall_pre_impl_setgid((long)(gid)) +#define __sanitizer_syscall_post_setgid(res, gid) \ + __sanitizer_syscall_post_impl_setgid(res, (long)(gid)) +#define __sanitizer_syscall_pre_setreuid(ruid, euid) \ + __sanitizer_syscall_pre_impl_setreuid((long)(ruid), (long)(euid)) +#define __sanitizer_syscall_post_setreuid(res, ruid, euid) \ + __sanitizer_syscall_post_impl_setreuid(res, (long)(ruid), (long)(euid)) +#define __sanitizer_syscall_pre_setuid(uid) \ + __sanitizer_syscall_pre_impl_setuid((long)(uid)) +#define __sanitizer_syscall_post_setuid(res, uid) \ + __sanitizer_syscall_post_impl_setuid(res, (long)(uid)) +#define __sanitizer_syscall_pre_setresuid(ruid, euid, suid) \ + __sanitizer_syscall_pre_impl_setresuid((long)(ruid), (long)(euid), \ + (long)(suid)) +#define __sanitizer_syscall_post_setresuid(res, ruid, euid, suid) \ + __sanitizer_syscall_post_impl_setresuid(res, (long)(ruid), (long)(euid), \ + (long)(suid)) +#define __sanitizer_syscall_pre_setresgid(rgid, egid, sgid) \ + __sanitizer_syscall_pre_impl_setresgid((long)(rgid), (long)(egid), \ + (long)(sgid)) +#define __sanitizer_syscall_post_setresgid(res, rgid, egid, sgid) \ + __sanitizer_syscall_post_impl_setresgid(res, (long)(rgid), (long)(egid), \ + (long)(sgid)) +#define __sanitizer_syscall_pre_setfsuid(uid) \ + __sanitizer_syscall_pre_impl_setfsuid((long)(uid)) +#define __sanitizer_syscall_post_setfsuid(res, uid) \ + __sanitizer_syscall_post_impl_setfsuid(res, (long)(uid)) +#define __sanitizer_syscall_pre_setfsgid(gid) \ + __sanitizer_syscall_pre_impl_setfsgid((long)(gid)) +#define __sanitizer_syscall_post_setfsgid(res, gid) \ + __sanitizer_syscall_post_impl_setfsgid(res, (long)(gid)) +#define __sanitizer_syscall_pre_setpgid(pid, pgid) \ + __sanitizer_syscall_pre_impl_setpgid((long)(pid), (long)(pgid)) +#define __sanitizer_syscall_post_setpgid(res, pid, pgid) \ + __sanitizer_syscall_post_impl_setpgid(res, (long)(pid), (long)(pgid)) +#define __sanitizer_syscall_pre_setsid() __sanitizer_syscall_pre_impl_setsid() +#define __sanitizer_syscall_post_setsid(res) \ + __sanitizer_syscall_post_impl_setsid(res) +#define __sanitizer_syscall_pre_setgroups(gidsetsize, grouplist) \ + __sanitizer_syscall_pre_impl_setgroups((long)(gidsetsize), (long)(grouplist)) +#define __sanitizer_syscall_post_setgroups(res, gidsetsize, grouplist) \ + __sanitizer_syscall_post_impl_setgroups(res, (long)(gidsetsize), \ + (long)(grouplist)) +#define __sanitizer_syscall_pre_acct(name) \ + __sanitizer_syscall_pre_impl_acct((long)(name)) +#define __sanitizer_syscall_post_acct(res, name) \ + __sanitizer_syscall_post_impl_acct(res, (long)(name)) +#define __sanitizer_syscall_pre_capget(header, dataptr) \ + __sanitizer_syscall_pre_impl_capget((long)(header), (long)(dataptr)) +#define __sanitizer_syscall_post_capget(res, header, dataptr) \ + __sanitizer_syscall_post_impl_capget(res, (long)(header), (long)(dataptr)) +#define __sanitizer_syscall_pre_capset(header, data) \ + __sanitizer_syscall_pre_impl_capset((long)(header), (long)(data)) +#define __sanitizer_syscall_post_capset(res, header, data) \ + __sanitizer_syscall_post_impl_capset(res, (long)(header), (long)(data)) +#define __sanitizer_syscall_pre_personality(personality) \ + __sanitizer_syscall_pre_impl_personality((long)(personality)) +#define __sanitizer_syscall_post_personality(res, personality) \ + __sanitizer_syscall_post_impl_personality(res, (long)(personality)) +#define __sanitizer_syscall_pre_sigpending(set) \ + __sanitizer_syscall_pre_impl_sigpending((long)(set)) +#define __sanitizer_syscall_post_sigpending(res, set) \ + __sanitizer_syscall_post_impl_sigpending(res, (long)(set)) +#define __sanitizer_syscall_pre_sigprocmask(how, set, oset) \ + __sanitizer_syscall_pre_impl_sigprocmask((long)(how), (long)(set), \ + (long)(oset)) +#define __sanitizer_syscall_post_sigprocmask(res, how, set, oset) \ + __sanitizer_syscall_post_impl_sigprocmask(res, (long)(how), (long)(set), \ + (long)(oset)) +#define __sanitizer_syscall_pre_getitimer(which, value) \ + __sanitizer_syscall_pre_impl_getitimer((long)(which), (long)(value)) +#define __sanitizer_syscall_post_getitimer(res, which, value) \ + __sanitizer_syscall_post_impl_getitimer(res, (long)(which), (long)(value)) +#define __sanitizer_syscall_pre_setitimer(which, value, ovalue) \ + __sanitizer_syscall_pre_impl_setitimer((long)(which), (long)(value), \ + (long)(ovalue)) +#define __sanitizer_syscall_post_setitimer(res, which, value, ovalue) \ + __sanitizer_syscall_post_impl_setitimer(res, (long)(which), (long)(value), \ + (long)(ovalue)) +#define __sanitizer_syscall_pre_timer_create(which_clock, timer_event_spec, \ + created_timer_id) \ + __sanitizer_syscall_pre_impl_timer_create( \ + (long)(which_clock), (long)(timer_event_spec), (long)(created_timer_id)) +#define __sanitizer_syscall_post_timer_create( \ + res, which_clock, timer_event_spec, created_timer_id) \ + __sanitizer_syscall_post_impl_timer_create(res, (long)(which_clock), \ + (long)(timer_event_spec), \ + (long)(created_timer_id)) +#define __sanitizer_syscall_pre_timer_gettime(timer_id, setting) \ + __sanitizer_syscall_pre_impl_timer_gettime((long)(timer_id), (long)(setting)) +#define __sanitizer_syscall_post_timer_gettime(res, timer_id, setting) \ + __sanitizer_syscall_post_impl_timer_gettime(res, (long)(timer_id), \ + (long)(setting)) +#define __sanitizer_syscall_pre_timer_getoverrun(timer_id) \ + __sanitizer_syscall_pre_impl_timer_getoverrun((long)(timer_id)) +#define __sanitizer_syscall_post_timer_getoverrun(res, timer_id) \ + __sanitizer_syscall_post_impl_timer_getoverrun(res, (long)(timer_id)) +#define __sanitizer_syscall_pre_timer_settime(timer_id, flags, new_setting, \ + old_setting) \ + __sanitizer_syscall_pre_impl_timer_settime((long)(timer_id), (long)(flags), \ + (long)(new_setting), \ + (long)(old_setting)) +#define __sanitizer_syscall_post_timer_settime(res, timer_id, flags, \ + new_setting, old_setting) \ + __sanitizer_syscall_post_impl_timer_settime( \ + res, (long)(timer_id), (long)(flags), (long)(new_setting), \ + (long)(old_setting)) +#define __sanitizer_syscall_pre_timer_delete(timer_id) \ + __sanitizer_syscall_pre_impl_timer_delete((long)(timer_id)) +#define __sanitizer_syscall_post_timer_delete(res, timer_id) \ + __sanitizer_syscall_post_impl_timer_delete(res, (long)(timer_id)) +#define __sanitizer_syscall_pre_clock_settime(which_clock, tp) \ + __sanitizer_syscall_pre_impl_clock_settime((long)(which_clock), (long)(tp)) +#define __sanitizer_syscall_post_clock_settime(res, which_clock, tp) \ + __sanitizer_syscall_post_impl_clock_settime(res, (long)(which_clock), \ + (long)(tp)) +#define __sanitizer_syscall_pre_clock_gettime(which_clock, tp) \ + __sanitizer_syscall_pre_impl_clock_gettime((long)(which_clock), (long)(tp)) +#define __sanitizer_syscall_post_clock_gettime(res, which_clock, tp) \ + __sanitizer_syscall_post_impl_clock_gettime(res, (long)(which_clock), \ + (long)(tp)) +#define __sanitizer_syscall_pre_clock_adjtime(which_clock, tx) \ + __sanitizer_syscall_pre_impl_clock_adjtime((long)(which_clock), (long)(tx)) +#define __sanitizer_syscall_post_clock_adjtime(res, which_clock, tx) \ + __sanitizer_syscall_post_impl_clock_adjtime(res, (long)(which_clock), \ + (long)(tx)) +#define __sanitizer_syscall_pre_clock_getres(which_clock, tp) \ + __sanitizer_syscall_pre_impl_clock_getres((long)(which_clock), (long)(tp)) +#define __sanitizer_syscall_post_clock_getres(res, which_clock, tp) \ + __sanitizer_syscall_post_impl_clock_getres(res, (long)(which_clock), \ + (long)(tp)) +#define __sanitizer_syscall_pre_clock_nanosleep(which_clock, flags, rqtp, \ + rmtp) \ + __sanitizer_syscall_pre_impl_clock_nanosleep( \ + (long)(which_clock), (long)(flags), (long)(rqtp), (long)(rmtp)) +#define __sanitizer_syscall_post_clock_nanosleep(res, which_clock, flags, \ + rqtp, rmtp) \ + __sanitizer_syscall_post_impl_clock_nanosleep( \ + res, (long)(which_clock), (long)(flags), (long)(rqtp), (long)(rmtp)) +#define __sanitizer_syscall_pre_nice(increment) \ + __sanitizer_syscall_pre_impl_nice((long)(increment)) +#define __sanitizer_syscall_post_nice(res, increment) \ + __sanitizer_syscall_post_impl_nice(res, (long)(increment)) +#define __sanitizer_syscall_pre_sched_setscheduler(pid, policy, param) \ + __sanitizer_syscall_pre_impl_sched_setscheduler((long)(pid), (long)(policy), \ + (long)(param)) +#define __sanitizer_syscall_post_sched_setscheduler(res, pid, policy, param) \ + __sanitizer_syscall_post_impl_sched_setscheduler( \ + res, (long)(pid), (long)(policy), (long)(param)) +#define __sanitizer_syscall_pre_sched_setparam(pid, param) \ + __sanitizer_syscall_pre_impl_sched_setparam((long)(pid), (long)(param)) +#define __sanitizer_syscall_post_sched_setparam(res, pid, param) \ + __sanitizer_syscall_post_impl_sched_setparam(res, (long)(pid), (long)(param)) +#define __sanitizer_syscall_pre_sched_getscheduler(pid) \ + __sanitizer_syscall_pre_impl_sched_getscheduler((long)(pid)) +#define __sanitizer_syscall_post_sched_getscheduler(res, pid) \ + __sanitizer_syscall_post_impl_sched_getscheduler(res, (long)(pid)) +#define __sanitizer_syscall_pre_sched_getparam(pid, param) \ + __sanitizer_syscall_pre_impl_sched_getparam((long)(pid), (long)(param)) +#define __sanitizer_syscall_post_sched_getparam(res, pid, param) \ + __sanitizer_syscall_post_impl_sched_getparam(res, (long)(pid), (long)(param)) +#define __sanitizer_syscall_pre_sched_setaffinity(pid, len, user_mask_ptr) \ + __sanitizer_syscall_pre_impl_sched_setaffinity((long)(pid), (long)(len), \ + (long)(user_mask_ptr)) +#define __sanitizer_syscall_post_sched_setaffinity(res, pid, len, \ + user_mask_ptr) \ + __sanitizer_syscall_post_impl_sched_setaffinity( \ + res, (long)(pid), (long)(len), (long)(user_mask_ptr)) +#define __sanitizer_syscall_pre_sched_getaffinity(pid, len, user_mask_ptr) \ + __sanitizer_syscall_pre_impl_sched_getaffinity((long)(pid), (long)(len), \ + (long)(user_mask_ptr)) +#define __sanitizer_syscall_post_sched_getaffinity(res, pid, len, \ + user_mask_ptr) \ + __sanitizer_syscall_post_impl_sched_getaffinity( \ + res, (long)(pid), (long)(len), (long)(user_mask_ptr)) +#define __sanitizer_syscall_pre_sched_yield() \ + __sanitizer_syscall_pre_impl_sched_yield() +#define __sanitizer_syscall_post_sched_yield(res) \ + __sanitizer_syscall_post_impl_sched_yield(res) +#define __sanitizer_syscall_pre_sched_get_priority_max(policy) \ + __sanitizer_syscall_pre_impl_sched_get_priority_max((long)(policy)) +#define __sanitizer_syscall_post_sched_get_priority_max(res, policy) \ + __sanitizer_syscall_post_impl_sched_get_priority_max(res, (long)(policy)) +#define __sanitizer_syscall_pre_sched_get_priority_min(policy) \ + __sanitizer_syscall_pre_impl_sched_get_priority_min((long)(policy)) +#define __sanitizer_syscall_post_sched_get_priority_min(res, policy) \ + __sanitizer_syscall_post_impl_sched_get_priority_min(res, (long)(policy)) +#define __sanitizer_syscall_pre_sched_rr_get_interval(pid, interval) \ + __sanitizer_syscall_pre_impl_sched_rr_get_interval((long)(pid), \ + (long)(interval)) +#define __sanitizer_syscall_post_sched_rr_get_interval(res, pid, interval) \ + __sanitizer_syscall_post_impl_sched_rr_get_interval(res, (long)(pid), \ + (long)(interval)) +#define __sanitizer_syscall_pre_setpriority(which, who, niceval) \ + __sanitizer_syscall_pre_impl_setpriority((long)(which), (long)(who), \ + (long)(niceval)) +#define __sanitizer_syscall_post_setpriority(res, which, who, niceval) \ + __sanitizer_syscall_post_impl_setpriority(res, (long)(which), (long)(who), \ + (long)(niceval)) +#define __sanitizer_syscall_pre_getpriority(which, who) \ + __sanitizer_syscall_pre_impl_getpriority((long)(which), (long)(who)) +#define __sanitizer_syscall_post_getpriority(res, which, who) \ + __sanitizer_syscall_post_impl_getpriority(res, (long)(which), (long)(who)) +#define __sanitizer_syscall_pre_shutdown(arg0, arg1) \ + __sanitizer_syscall_pre_impl_shutdown((long)(arg0), (long)(arg1)) +#define __sanitizer_syscall_post_shutdown(res, arg0, arg1) \ + __sanitizer_syscall_post_impl_shutdown(res, (long)(arg0), (long)(arg1)) +#define __sanitizer_syscall_pre_reboot(magic1, magic2, cmd, arg) \ + __sanitizer_syscall_pre_impl_reboot((long)(magic1), (long)(magic2), \ + (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_post_reboot(res, magic1, magic2, cmd, arg) \ + __sanitizer_syscall_post_impl_reboot(res, (long)(magic1), (long)(magic2), \ + (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_pre_restart_syscall() \ + __sanitizer_syscall_pre_impl_restart_syscall() +#define __sanitizer_syscall_post_restart_syscall(res) \ + __sanitizer_syscall_post_impl_restart_syscall(res) +#define __sanitizer_syscall_pre_kexec_load(entry, nr_segments, segments, \ + flags) \ + __sanitizer_syscall_pre_impl_kexec_load((long)(entry), (long)(nr_segments), \ + (long)(segments), (long)(flags)) +#define __sanitizer_syscall_post_kexec_load(res, entry, nr_segments, segments, \ + flags) \ + __sanitizer_syscall_post_impl_kexec_load(res, (long)(entry), \ + (long)(nr_segments), \ + (long)(segments), (long)(flags)) +#define __sanitizer_syscall_pre_exit(error_code) \ + __sanitizer_syscall_pre_impl_exit((long)(error_code)) +#define __sanitizer_syscall_post_exit(res, error_code) \ + __sanitizer_syscall_post_impl_exit(res, (long)(error_code)) +#define __sanitizer_syscall_pre_exit_group(error_code) \ + __sanitizer_syscall_pre_impl_exit_group((long)(error_code)) +#define __sanitizer_syscall_post_exit_group(res, error_code) \ + __sanitizer_syscall_post_impl_exit_group(res, (long)(error_code)) +#define __sanitizer_syscall_pre_wait4(pid, stat_addr, options, ru) \ + __sanitizer_syscall_pre_impl_wait4((long)(pid), (long)(stat_addr), \ + (long)(options), (long)(ru)) +#define __sanitizer_syscall_post_wait4(res, pid, stat_addr, options, ru) \ + __sanitizer_syscall_post_impl_wait4(res, (long)(pid), (long)(stat_addr), \ + (long)(options), (long)(ru)) +#define __sanitizer_syscall_pre_waitid(which, pid, infop, options, ru) \ + __sanitizer_syscall_pre_impl_waitid( \ + (long)(which), (long)(pid), (long)(infop), (long)(options), (long)(ru)) +#define __sanitizer_syscall_post_waitid(res, which, pid, infop, options, ru) \ + __sanitizer_syscall_post_impl_waitid(res, (long)(which), (long)(pid), \ + (long)(infop), (long)(options), \ + (long)(ru)) +#define __sanitizer_syscall_pre_waitpid(pid, stat_addr, options) \ + __sanitizer_syscall_pre_impl_waitpid((long)(pid), (long)(stat_addr), \ + (long)(options)) +#define __sanitizer_syscall_post_waitpid(res, pid, stat_addr, options) \ + __sanitizer_syscall_post_impl_waitpid(res, (long)(pid), (long)(stat_addr), \ + (long)(options)) +#define __sanitizer_syscall_pre_set_tid_address(tidptr) \ + __sanitizer_syscall_pre_impl_set_tid_address((long)(tidptr)) +#define __sanitizer_syscall_post_set_tid_address(res, tidptr) \ + __sanitizer_syscall_post_impl_set_tid_address(res, (long)(tidptr)) +#define __sanitizer_syscall_pre_init_module(umod, len, uargs) \ + __sanitizer_syscall_pre_impl_init_module((long)(umod), (long)(len), \ + (long)(uargs)) +#define __sanitizer_syscall_post_init_module(res, umod, len, uargs) \ + __sanitizer_syscall_post_impl_init_module(res, (long)(umod), (long)(len), \ + (long)(uargs)) +#define __sanitizer_syscall_pre_delete_module(name_user, flags) \ + __sanitizer_syscall_pre_impl_delete_module((long)(name_user), (long)(flags)) +#define __sanitizer_syscall_post_delete_module(res, name_user, flags) \ + __sanitizer_syscall_post_impl_delete_module(res, (long)(name_user), \ + (long)(flags)) +#define __sanitizer_syscall_pre_rt_sigprocmask(how, set, oset, sigsetsize) \ + __sanitizer_syscall_pre_impl_rt_sigprocmask( \ + (long)(how), (long)(set), (long)(oset), (long)(sigsetsize)) +#define __sanitizer_syscall_post_rt_sigprocmask(res, how, set, oset, \ + sigsetsize) \ + __sanitizer_syscall_post_impl_rt_sigprocmask( \ + res, (long)(how), (long)(set), (long)(oset), (long)(sigsetsize)) +#define __sanitizer_syscall_pre_rt_sigpending(set, sigsetsize) \ + __sanitizer_syscall_pre_impl_rt_sigpending((long)(set), (long)(sigsetsize)) +#define __sanitizer_syscall_post_rt_sigpending(res, set, sigsetsize) \ + __sanitizer_syscall_post_impl_rt_sigpending(res, (long)(set), \ + (long)(sigsetsize)) +#define __sanitizer_syscall_pre_rt_sigtimedwait(uthese, uinfo, uts, \ + sigsetsize) \ + __sanitizer_syscall_pre_impl_rt_sigtimedwait( \ + (long)(uthese), (long)(uinfo), (long)(uts), (long)(sigsetsize)) +#define __sanitizer_syscall_post_rt_sigtimedwait(res, uthese, uinfo, uts, \ + sigsetsize) \ + __sanitizer_syscall_post_impl_rt_sigtimedwait( \ + res, (long)(uthese), (long)(uinfo), (long)(uts), (long)(sigsetsize)) +#define __sanitizer_syscall_pre_rt_tgsigqueueinfo(tgid, pid, sig, uinfo) \ + __sanitizer_syscall_pre_impl_rt_tgsigqueueinfo((long)(tgid), (long)(pid), \ + (long)(sig), (long)(uinfo)) +#define __sanitizer_syscall_post_rt_tgsigqueueinfo(res, tgid, pid, sig, uinfo) \ + __sanitizer_syscall_post_impl_rt_tgsigqueueinfo( \ + res, (long)(tgid), (long)(pid), (long)(sig), (long)(uinfo)) +#define __sanitizer_syscall_pre_kill(pid, sig) \ + __sanitizer_syscall_pre_impl_kill((long)(pid), (long)(sig)) +#define __sanitizer_syscall_post_kill(res, pid, sig) \ + __sanitizer_syscall_post_impl_kill(res, (long)(pid), (long)(sig)) +#define __sanitizer_syscall_pre_tgkill(tgid, pid, sig) \ + __sanitizer_syscall_pre_impl_tgkill((long)(tgid), (long)(pid), (long)(sig)) +#define __sanitizer_syscall_post_tgkill(res, tgid, pid, sig) \ + __sanitizer_syscall_post_impl_tgkill(res, (long)(tgid), (long)(pid), \ + (long)(sig)) +#define __sanitizer_syscall_pre_tkill(pid, sig) \ + __sanitizer_syscall_pre_impl_tkill((long)(pid), (long)(sig)) +#define __sanitizer_syscall_post_tkill(res, pid, sig) \ + __sanitizer_syscall_post_impl_tkill(res, (long)(pid), (long)(sig)) +#define __sanitizer_syscall_pre_rt_sigqueueinfo(pid, sig, uinfo) \ + __sanitizer_syscall_pre_impl_rt_sigqueueinfo((long)(pid), (long)(sig), \ + (long)(uinfo)) +#define __sanitizer_syscall_post_rt_sigqueueinfo(res, pid, sig, uinfo) \ + __sanitizer_syscall_post_impl_rt_sigqueueinfo(res, (long)(pid), (long)(sig), \ + (long)(uinfo)) +#define __sanitizer_syscall_pre_sgetmask() \ + __sanitizer_syscall_pre_impl_sgetmask() +#define __sanitizer_syscall_post_sgetmask(res) \ + __sanitizer_syscall_post_impl_sgetmask(res) +#define __sanitizer_syscall_pre_ssetmask(newmask) \ + __sanitizer_syscall_pre_impl_ssetmask((long)(newmask)) +#define __sanitizer_syscall_post_ssetmask(res, newmask) \ + __sanitizer_syscall_post_impl_ssetmask(res, (long)(newmask)) +#define __sanitizer_syscall_pre_signal(sig, handler) \ + __sanitizer_syscall_pre_impl_signal((long)(sig), (long)(handler)) +#define __sanitizer_syscall_post_signal(res, sig, handler) \ + __sanitizer_syscall_post_impl_signal(res, (long)(sig), (long)(handler)) +#define __sanitizer_syscall_pre_pause() __sanitizer_syscall_pre_impl_pause() +#define __sanitizer_syscall_post_pause(res) \ + __sanitizer_syscall_post_impl_pause(res) +#define __sanitizer_syscall_pre_sync() __sanitizer_syscall_pre_impl_sync() +#define __sanitizer_syscall_post_sync(res) \ + __sanitizer_syscall_post_impl_sync(res) +#define __sanitizer_syscall_pre_fsync(fd) \ + __sanitizer_syscall_pre_impl_fsync((long)(fd)) +#define __sanitizer_syscall_post_fsync(res, fd) \ + __sanitizer_syscall_post_impl_fsync(res, (long)(fd)) +#define __sanitizer_syscall_pre_fdatasync(fd) \ + __sanitizer_syscall_pre_impl_fdatasync((long)(fd)) +#define __sanitizer_syscall_post_fdatasync(res, fd) \ + __sanitizer_syscall_post_impl_fdatasync(res, (long)(fd)) +#define __sanitizer_syscall_pre_bdflush(func, data) \ + __sanitizer_syscall_pre_impl_bdflush((long)(func), (long)(data)) +#define __sanitizer_syscall_post_bdflush(res, func, data) \ + __sanitizer_syscall_post_impl_bdflush(res, (long)(func), (long)(data)) +#define __sanitizer_syscall_pre_mount(dev_name, dir_name, type, flags, data) \ + __sanitizer_syscall_pre_impl_mount((long)(dev_name), (long)(dir_name), \ + (long)(type), (long)(flags), \ + (long)(data)) +#define __sanitizer_syscall_post_mount(res, dev_name, dir_name, type, flags, \ + data) \ + __sanitizer_syscall_post_impl_mount(res, (long)(dev_name), (long)(dir_name), \ + (long)(type), (long)(flags), \ + (long)(data)) +#define __sanitizer_syscall_pre_umount(name, flags) \ + __sanitizer_syscall_pre_impl_umount((long)(name), (long)(flags)) +#define __sanitizer_syscall_post_umount(res, name, flags) \ + __sanitizer_syscall_post_impl_umount(res, (long)(name), (long)(flags)) +#define __sanitizer_syscall_pre_oldumount(name) \ + __sanitizer_syscall_pre_impl_oldumount((long)(name)) +#define __sanitizer_syscall_post_oldumount(res, name) \ + __sanitizer_syscall_post_impl_oldumount(res, (long)(name)) +#define __sanitizer_syscall_pre_truncate(path, length) \ + __sanitizer_syscall_pre_impl_truncate((long)(path), (long)(length)) +#define __sanitizer_syscall_post_truncate(res, path, length) \ + __sanitizer_syscall_post_impl_truncate(res, (long)(path), (long)(length)) +#define __sanitizer_syscall_pre_ftruncate(fd, length) \ + __sanitizer_syscall_pre_impl_ftruncate((long)(fd), (long)(length)) +#define __sanitizer_syscall_post_ftruncate(res, fd, length) \ + __sanitizer_syscall_post_impl_ftruncate(res, (long)(fd), (long)(length)) +#define __sanitizer_syscall_pre_stat(filename, statbuf) \ + __sanitizer_syscall_pre_impl_stat((long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_post_stat(res, filename, statbuf) \ + __sanitizer_syscall_post_impl_stat(res, (long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_pre_statfs(path, buf) \ + __sanitizer_syscall_pre_impl_statfs((long)(path), (long)(buf)) +#define __sanitizer_syscall_post_statfs(res, path, buf) \ + __sanitizer_syscall_post_impl_statfs(res, (long)(path), (long)(buf)) +#define __sanitizer_syscall_pre_statfs64(path, sz, buf) \ + __sanitizer_syscall_pre_impl_statfs64((long)(path), (long)(sz), (long)(buf)) +#define __sanitizer_syscall_post_statfs64(res, path, sz, buf) \ + __sanitizer_syscall_post_impl_statfs64(res, (long)(path), (long)(sz), \ + (long)(buf)) +#define __sanitizer_syscall_pre_fstatfs(fd, buf) \ + __sanitizer_syscall_pre_impl_fstatfs((long)(fd), (long)(buf)) +#define __sanitizer_syscall_post_fstatfs(res, fd, buf) \ + __sanitizer_syscall_post_impl_fstatfs(res, (long)(fd), (long)(buf)) +#define __sanitizer_syscall_pre_fstatfs64(fd, sz, buf) \ + __sanitizer_syscall_pre_impl_fstatfs64((long)(fd), (long)(sz), (long)(buf)) +#define __sanitizer_syscall_post_fstatfs64(res, fd, sz, buf) \ + __sanitizer_syscall_post_impl_fstatfs64(res, (long)(fd), (long)(sz), \ + (long)(buf)) +#define __sanitizer_syscall_pre_lstat(filename, statbuf) \ + __sanitizer_syscall_pre_impl_lstat((long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_post_lstat(res, filename, statbuf) \ + __sanitizer_syscall_post_impl_lstat(res, (long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_pre_fstat(fd, statbuf) \ + __sanitizer_syscall_pre_impl_fstat((long)(fd), (long)(statbuf)) +#define __sanitizer_syscall_post_fstat(res, fd, statbuf) \ + __sanitizer_syscall_post_impl_fstat(res, (long)(fd), (long)(statbuf)) +#define __sanitizer_syscall_pre_newstat(filename, statbuf) \ + __sanitizer_syscall_pre_impl_newstat((long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_post_newstat(res, filename, statbuf) \ + __sanitizer_syscall_post_impl_newstat(res, (long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_pre_newlstat(filename, statbuf) \ + __sanitizer_syscall_pre_impl_newlstat((long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_post_newlstat(res, filename, statbuf) \ + __sanitizer_syscall_post_impl_newlstat(res, (long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_pre_newfstat(fd, statbuf) \ + __sanitizer_syscall_pre_impl_newfstat((long)(fd), (long)(statbuf)) +#define __sanitizer_syscall_post_newfstat(res, fd, statbuf) \ + __sanitizer_syscall_post_impl_newfstat(res, (long)(fd), (long)(statbuf)) +#define __sanitizer_syscall_pre_ustat(dev, ubuf) \ + __sanitizer_syscall_pre_impl_ustat((long)(dev), (long)(ubuf)) +#define __sanitizer_syscall_post_ustat(res, dev, ubuf) \ + __sanitizer_syscall_post_impl_ustat(res, (long)(dev), (long)(ubuf)) +#define __sanitizer_syscall_pre_stat64(filename, statbuf) \ + __sanitizer_syscall_pre_impl_stat64((long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_post_stat64(res, filename, statbuf) \ + __sanitizer_syscall_post_impl_stat64(res, (long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_pre_fstat64(fd, statbuf) \ + __sanitizer_syscall_pre_impl_fstat64((long)(fd), (long)(statbuf)) +#define __sanitizer_syscall_post_fstat64(res, fd, statbuf) \ + __sanitizer_syscall_post_impl_fstat64(res, (long)(fd), (long)(statbuf)) +#define __sanitizer_syscall_pre_lstat64(filename, statbuf) \ + __sanitizer_syscall_pre_impl_lstat64((long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_post_lstat64(res, filename, statbuf) \ + __sanitizer_syscall_post_impl_lstat64(res, (long)(filename), (long)(statbuf)) +#define __sanitizer_syscall_pre_setxattr(path, name, value, size, flags) \ + __sanitizer_syscall_pre_impl_setxattr( \ + (long)(path), (long)(name), (long)(value), (long)(size), (long)(flags)) +#define __sanitizer_syscall_post_setxattr(res, path, name, value, size, flags) \ + __sanitizer_syscall_post_impl_setxattr(res, (long)(path), (long)(name), \ + (long)(value), (long)(size), \ + (long)(flags)) +#define __sanitizer_syscall_pre_lsetxattr(path, name, value, size, flags) \ + __sanitizer_syscall_pre_impl_lsetxattr( \ + (long)(path), (long)(name), (long)(value), (long)(size), (long)(flags)) +#define __sanitizer_syscall_post_lsetxattr(res, path, name, value, size, \ + flags) \ + __sanitizer_syscall_post_impl_lsetxattr(res, (long)(path), (long)(name), \ + (long)(value), (long)(size), \ + (long)(flags)) +#define __sanitizer_syscall_pre_fsetxattr(fd, name, value, size, flags) \ + __sanitizer_syscall_pre_impl_fsetxattr( \ + (long)(fd), (long)(name), (long)(value), (long)(size), (long)(flags)) +#define __sanitizer_syscall_post_fsetxattr(res, fd, name, value, size, flags) \ + __sanitizer_syscall_post_impl_fsetxattr(res, (long)(fd), (long)(name), \ + (long)(value), (long)(size), \ + (long)(flags)) +#define __sanitizer_syscall_pre_getxattr(path, name, value, size) \ + __sanitizer_syscall_pre_impl_getxattr((long)(path), (long)(name), \ + (long)(value), (long)(size)) +#define __sanitizer_syscall_post_getxattr(res, path, name, value, size) \ + __sanitizer_syscall_post_impl_getxattr(res, (long)(path), (long)(name), \ + (long)(value), (long)(size)) +#define __sanitizer_syscall_pre_lgetxattr(path, name, value, size) \ + __sanitizer_syscall_pre_impl_lgetxattr((long)(path), (long)(name), \ + (long)(value), (long)(size)) +#define __sanitizer_syscall_post_lgetxattr(res, path, name, value, size) \ + __sanitizer_syscall_post_impl_lgetxattr(res, (long)(path), (long)(name), \ + (long)(value), (long)(size)) +#define __sanitizer_syscall_pre_fgetxattr(fd, name, value, size) \ + __sanitizer_syscall_pre_impl_fgetxattr((long)(fd), (long)(name), \ + (long)(value), (long)(size)) +#define __sanitizer_syscall_post_fgetxattr(res, fd, name, value, size) \ + __sanitizer_syscall_post_impl_fgetxattr(res, (long)(fd), (long)(name), \ + (long)(value), (long)(size)) +#define __sanitizer_syscall_pre_listxattr(path, list, size) \ + __sanitizer_syscall_pre_impl_listxattr((long)(path), (long)(list), \ + (long)(size)) +#define __sanitizer_syscall_post_listxattr(res, path, list, size) \ + __sanitizer_syscall_post_impl_listxattr(res, (long)(path), (long)(list), \ + (long)(size)) +#define __sanitizer_syscall_pre_llistxattr(path, list, size) \ + __sanitizer_syscall_pre_impl_llistxattr((long)(path), (long)(list), \ + (long)(size)) +#define __sanitizer_syscall_post_llistxattr(res, path, list, size) \ + __sanitizer_syscall_post_impl_llistxattr(res, (long)(path), (long)(list), \ + (long)(size)) +#define __sanitizer_syscall_pre_flistxattr(fd, list, size) \ + __sanitizer_syscall_pre_impl_flistxattr((long)(fd), (long)(list), \ + (long)(size)) +#define __sanitizer_syscall_post_flistxattr(res, fd, list, size) \ + __sanitizer_syscall_post_impl_flistxattr(res, (long)(fd), (long)(list), \ + (long)(size)) +#define __sanitizer_syscall_pre_removexattr(path, name) \ + __sanitizer_syscall_pre_impl_removexattr((long)(path), (long)(name)) +#define __sanitizer_syscall_post_removexattr(res, path, name) \ + __sanitizer_syscall_post_impl_removexattr(res, (long)(path), (long)(name)) +#define __sanitizer_syscall_pre_lremovexattr(path, name) \ + __sanitizer_syscall_pre_impl_lremovexattr((long)(path), (long)(name)) +#define __sanitizer_syscall_post_lremovexattr(res, path, name) \ + __sanitizer_syscall_post_impl_lremovexattr(res, (long)(path), (long)(name)) +#define __sanitizer_syscall_pre_fremovexattr(fd, name) \ + __sanitizer_syscall_pre_impl_fremovexattr((long)(fd), (long)(name)) +#define __sanitizer_syscall_post_fremovexattr(res, fd, name) \ + __sanitizer_syscall_post_impl_fremovexattr(res, (long)(fd), (long)(name)) +#define __sanitizer_syscall_pre_brk(brk) \ + __sanitizer_syscall_pre_impl_brk((long)(brk)) +#define __sanitizer_syscall_post_brk(res, brk) \ + __sanitizer_syscall_post_impl_brk(res, (long)(brk)) +#define __sanitizer_syscall_pre_mprotect(start, len, prot) \ + __sanitizer_syscall_pre_impl_mprotect((long)(start), (long)(len), \ + (long)(prot)) +#define __sanitizer_syscall_post_mprotect(res, start, len, prot) \ + __sanitizer_syscall_post_impl_mprotect(res, (long)(start), (long)(len), \ + (long)(prot)) +#define __sanitizer_syscall_pre_mremap(addr, old_len, new_len, flags, \ + new_addr) \ + __sanitizer_syscall_pre_impl_mremap((long)(addr), (long)(old_len), \ + (long)(new_len), (long)(flags), \ + (long)(new_addr)) +#define __sanitizer_syscall_post_mremap(res, addr, old_len, new_len, flags, \ + new_addr) \ + __sanitizer_syscall_post_impl_mremap(res, (long)(addr), (long)(old_len), \ + (long)(new_len), (long)(flags), \ + (long)(new_addr)) +#define __sanitizer_syscall_pre_remap_file_pages(start, size, prot, pgoff, \ + flags) \ + __sanitizer_syscall_pre_impl_remap_file_pages( \ + (long)(start), (long)(size), (long)(prot), (long)(pgoff), (long)(flags)) +#define __sanitizer_syscall_post_remap_file_pages(res, start, size, prot, \ + pgoff, flags) \ + __sanitizer_syscall_post_impl_remap_file_pages(res, (long)(start), \ + (long)(size), (long)(prot), \ + (long)(pgoff), (long)(flags)) +#define __sanitizer_syscall_pre_msync(start, len, flags) \ + __sanitizer_syscall_pre_impl_msync((long)(start), (long)(len), (long)(flags)) +#define __sanitizer_syscall_post_msync(res, start, len, flags) \ + __sanitizer_syscall_post_impl_msync(res, (long)(start), (long)(len), \ + (long)(flags)) +#define __sanitizer_syscall_pre_munmap(addr, len) \ + __sanitizer_syscall_pre_impl_munmap((long)(addr), (long)(len)) +#define __sanitizer_syscall_post_munmap(res, addr, len) \ + __sanitizer_syscall_post_impl_munmap(res, (long)(addr), (long)(len)) +#define __sanitizer_syscall_pre_mlock(start, len) \ + __sanitizer_syscall_pre_impl_mlock((long)(start), (long)(len)) +#define __sanitizer_syscall_post_mlock(res, start, len) \ + __sanitizer_syscall_post_impl_mlock(res, (long)(start), (long)(len)) +#define __sanitizer_syscall_pre_munlock(start, len) \ + __sanitizer_syscall_pre_impl_munlock((long)(start), (long)(len)) +#define __sanitizer_syscall_post_munlock(res, start, len) \ + __sanitizer_syscall_post_impl_munlock(res, (long)(start), (long)(len)) +#define __sanitizer_syscall_pre_mlockall(flags) \ + __sanitizer_syscall_pre_impl_mlockall((long)(flags)) +#define __sanitizer_syscall_post_mlockall(res, flags) \ + __sanitizer_syscall_post_impl_mlockall(res, (long)(flags)) +#define __sanitizer_syscall_pre_munlockall() \ + __sanitizer_syscall_pre_impl_munlockall() +#define __sanitizer_syscall_post_munlockall(res) \ + __sanitizer_syscall_post_impl_munlockall(res) +#define __sanitizer_syscall_pre_madvise(start, len, behavior) \ + __sanitizer_syscall_pre_impl_madvise((long)(start), (long)(len), \ + (long)(behavior)) +#define __sanitizer_syscall_post_madvise(res, start, len, behavior) \ + __sanitizer_syscall_post_impl_madvise(res, (long)(start), (long)(len), \ + (long)(behavior)) +#define __sanitizer_syscall_pre_mincore(start, len, vec) \ + __sanitizer_syscall_pre_impl_mincore((long)(start), (long)(len), (long)(vec)) +#define __sanitizer_syscall_post_mincore(res, start, len, vec) \ + __sanitizer_syscall_post_impl_mincore(res, (long)(start), (long)(len), \ + (long)(vec)) +#define __sanitizer_syscall_pre_pivot_root(new_root, put_old) \ + __sanitizer_syscall_pre_impl_pivot_root((long)(new_root), (long)(put_old)) +#define __sanitizer_syscall_post_pivot_root(res, new_root, put_old) \ + __sanitizer_syscall_post_impl_pivot_root(res, (long)(new_root), \ + (long)(put_old)) +#define __sanitizer_syscall_pre_chroot(filename) \ + __sanitizer_syscall_pre_impl_chroot((long)(filename)) +#define __sanitizer_syscall_post_chroot(res, filename) \ + __sanitizer_syscall_post_impl_chroot(res, (long)(filename)) +#define __sanitizer_syscall_pre_mknod(filename, mode, dev) \ + __sanitizer_syscall_pre_impl_mknod((long)(filename), (long)(mode), \ + (long)(dev)) +#define __sanitizer_syscall_post_mknod(res, filename, mode, dev) \ + __sanitizer_syscall_post_impl_mknod(res, (long)(filename), (long)(mode), \ + (long)(dev)) +#define __sanitizer_syscall_pre_link(oldname, newname) \ + __sanitizer_syscall_pre_impl_link((long)(oldname), (long)(newname)) +#define __sanitizer_syscall_post_link(res, oldname, newname) \ + __sanitizer_syscall_post_impl_link(res, (long)(oldname), (long)(newname)) +#define __sanitizer_syscall_pre_symlink(old, new_) \ + __sanitizer_syscall_pre_impl_symlink((long)(old), (long)(new_)) +#define __sanitizer_syscall_post_symlink(res, old, new_) \ + __sanitizer_syscall_post_impl_symlink(res, (long)(old), (long)(new_)) +#define __sanitizer_syscall_pre_unlink(pathname) \ + __sanitizer_syscall_pre_impl_unlink((long)(pathname)) +#define __sanitizer_syscall_post_unlink(res, pathname) \ + __sanitizer_syscall_post_impl_unlink(res, (long)(pathname)) +#define __sanitizer_syscall_pre_rename(oldname, newname) \ + __sanitizer_syscall_pre_impl_rename((long)(oldname), (long)(newname)) +#define __sanitizer_syscall_post_rename(res, oldname, newname) \ + __sanitizer_syscall_post_impl_rename(res, (long)(oldname), (long)(newname)) +#define __sanitizer_syscall_pre_chmod(filename, mode) \ + __sanitizer_syscall_pre_impl_chmod((long)(filename), (long)(mode)) +#define __sanitizer_syscall_post_chmod(res, filename, mode) \ + __sanitizer_syscall_post_impl_chmod(res, (long)(filename), (long)(mode)) +#define __sanitizer_syscall_pre_fchmod(fd, mode) \ + __sanitizer_syscall_pre_impl_fchmod((long)(fd), (long)(mode)) +#define __sanitizer_syscall_post_fchmod(res, fd, mode) \ + __sanitizer_syscall_post_impl_fchmod(res, (long)(fd), (long)(mode)) +#define __sanitizer_syscall_pre_fcntl(fd, cmd, arg) \ + __sanitizer_syscall_pre_impl_fcntl((long)(fd), (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_post_fcntl(res, fd, cmd, arg) \ + __sanitizer_syscall_post_impl_fcntl(res, (long)(fd), (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_pre_fcntl64(fd, cmd, arg) \ + __sanitizer_syscall_pre_impl_fcntl64((long)(fd), (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_post_fcntl64(res, fd, cmd, arg) \ + __sanitizer_syscall_post_impl_fcntl64(res, (long)(fd), (long)(cmd), \ + (long)(arg)) +#define __sanitizer_syscall_pre_pipe(fildes) \ + __sanitizer_syscall_pre_impl_pipe((long)(fildes)) +#define __sanitizer_syscall_post_pipe(res, fildes) \ + __sanitizer_syscall_post_impl_pipe(res, (long)(fildes)) +#define __sanitizer_syscall_pre_pipe2(fildes, flags) \ + __sanitizer_syscall_pre_impl_pipe2((long)(fildes), (long)(flags)) +#define __sanitizer_syscall_post_pipe2(res, fildes, flags) \ + __sanitizer_syscall_post_impl_pipe2(res, (long)(fildes), (long)(flags)) +#define __sanitizer_syscall_pre_dup(fildes) \ + __sanitizer_syscall_pre_impl_dup((long)(fildes)) +#define __sanitizer_syscall_post_dup(res, fildes) \ + __sanitizer_syscall_post_impl_dup(res, (long)(fildes)) +#define __sanitizer_syscall_pre_dup2(oldfd, newfd) \ + __sanitizer_syscall_pre_impl_dup2((long)(oldfd), (long)(newfd)) +#define __sanitizer_syscall_post_dup2(res, oldfd, newfd) \ + __sanitizer_syscall_post_impl_dup2(res, (long)(oldfd), (long)(newfd)) +#define __sanitizer_syscall_pre_dup3(oldfd, newfd, flags) \ + __sanitizer_syscall_pre_impl_dup3((long)(oldfd), (long)(newfd), (long)(flags)) +#define __sanitizer_syscall_post_dup3(res, oldfd, newfd, flags) \ + __sanitizer_syscall_post_impl_dup3(res, (long)(oldfd), (long)(newfd), \ + (long)(flags)) +#define __sanitizer_syscall_pre_ioperm(from, num, on) \ + __sanitizer_syscall_pre_impl_ioperm((long)(from), (long)(num), (long)(on)) +#define __sanitizer_syscall_post_ioperm(res, from, num, on) \ + __sanitizer_syscall_post_impl_ioperm(res, (long)(from), (long)(num), \ + (long)(on)) +#define __sanitizer_syscall_pre_ioctl(fd, cmd, arg) \ + __sanitizer_syscall_pre_impl_ioctl((long)(fd), (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_post_ioctl(res, fd, cmd, arg) \ + __sanitizer_syscall_post_impl_ioctl(res, (long)(fd), (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_pre_flock(fd, cmd) \ + __sanitizer_syscall_pre_impl_flock((long)(fd), (long)(cmd)) +#define __sanitizer_syscall_post_flock(res, fd, cmd) \ + __sanitizer_syscall_post_impl_flock(res, (long)(fd), (long)(cmd)) +#define __sanitizer_syscall_pre_io_setup(nr_reqs, ctx) \ + __sanitizer_syscall_pre_impl_io_setup((long)(nr_reqs), (long)(ctx)) +#define __sanitizer_syscall_post_io_setup(res, nr_reqs, ctx) \ + __sanitizer_syscall_post_impl_io_setup(res, (long)(nr_reqs), (long)(ctx)) +#define __sanitizer_syscall_pre_io_destroy(ctx) \ + __sanitizer_syscall_pre_impl_io_destroy((long)(ctx)) +#define __sanitizer_syscall_post_io_destroy(res, ctx) \ + __sanitizer_syscall_post_impl_io_destroy(res, (long)(ctx)) +#define __sanitizer_syscall_pre_io_getevents(ctx_id, min_nr, nr, events, \ + timeout) \ + __sanitizer_syscall_pre_impl_io_getevents((long)(ctx_id), (long)(min_nr), \ + (long)(nr), (long)(events), \ + (long)(timeout)) +#define __sanitizer_syscall_post_io_getevents(res, ctx_id, min_nr, nr, events, \ + timeout) \ + __sanitizer_syscall_post_impl_io_getevents(res, (long)(ctx_id), \ + (long)(min_nr), (long)(nr), \ + (long)(events), (long)(timeout)) +#define __sanitizer_syscall_pre_io_submit(ctx_id, arg1, arg2) \ + __sanitizer_syscall_pre_impl_io_submit((long)(ctx_id), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_post_io_submit(res, ctx_id, arg1, arg2) \ + __sanitizer_syscall_post_impl_io_submit(res, (long)(ctx_id), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_io_cancel(ctx_id, iocb, result) \ + __sanitizer_syscall_pre_impl_io_cancel((long)(ctx_id), (long)(iocb), \ + (long)(result)) +#define __sanitizer_syscall_post_io_cancel(res, ctx_id, iocb, result) \ + __sanitizer_syscall_post_impl_io_cancel(res, (long)(ctx_id), (long)(iocb), \ + (long)(result)) +#define __sanitizer_syscall_pre_sendfile(out_fd, in_fd, offset, count) \ + __sanitizer_syscall_pre_impl_sendfile((long)(out_fd), (long)(in_fd), \ + (long)(offset), (long)(count)) +#define __sanitizer_syscall_post_sendfile(res, out_fd, in_fd, offset, count) \ + __sanitizer_syscall_post_impl_sendfile(res, (long)(out_fd), (long)(in_fd), \ + (long)(offset), (long)(count)) +#define __sanitizer_syscall_pre_sendfile64(out_fd, in_fd, offset, count) \ + __sanitizer_syscall_pre_impl_sendfile64((long)(out_fd), (long)(in_fd), \ + (long)(offset), (long)(count)) +#define __sanitizer_syscall_post_sendfile64(res, out_fd, in_fd, offset, count) \ + __sanitizer_syscall_post_impl_sendfile64(res, (long)(out_fd), (long)(in_fd), \ + (long)(offset), (long)(count)) +#define __sanitizer_syscall_pre_readlink(path, buf, bufsiz) \ + __sanitizer_syscall_pre_impl_readlink((long)(path), (long)(buf), \ + (long)(bufsiz)) +#define __sanitizer_syscall_post_readlink(res, path, buf, bufsiz) \ + __sanitizer_syscall_post_impl_readlink(res, (long)(path), (long)(buf), \ + (long)(bufsiz)) +#define __sanitizer_syscall_pre_creat(pathname, mode) \ + __sanitizer_syscall_pre_impl_creat((long)(pathname), (long)(mode)) +#define __sanitizer_syscall_post_creat(res, pathname, mode) \ + __sanitizer_syscall_post_impl_creat(res, (long)(pathname), (long)(mode)) +#define __sanitizer_syscall_pre_open(filename, flags, mode) \ + __sanitizer_syscall_pre_impl_open((long)(filename), (long)(flags), \ + (long)(mode)) +#define __sanitizer_syscall_post_open(res, filename, flags, mode) \ + __sanitizer_syscall_post_impl_open(res, (long)(filename), (long)(flags), \ + (long)(mode)) +#define __sanitizer_syscall_pre_close(fd) \ + __sanitizer_syscall_pre_impl_close((long)(fd)) +#define __sanitizer_syscall_post_close(res, fd) \ + __sanitizer_syscall_post_impl_close(res, (long)(fd)) +#define __sanitizer_syscall_pre_access(filename, mode) \ + __sanitizer_syscall_pre_impl_access((long)(filename), (long)(mode)) +#define __sanitizer_syscall_post_access(res, filename, mode) \ + __sanitizer_syscall_post_impl_access(res, (long)(filename), (long)(mode)) +#define __sanitizer_syscall_pre_vhangup() __sanitizer_syscall_pre_impl_vhangup() +#define __sanitizer_syscall_post_vhangup(res) \ + __sanitizer_syscall_post_impl_vhangup(res) +#define __sanitizer_syscall_pre_chown(filename, user, group) \ + __sanitizer_syscall_pre_impl_chown((long)(filename), (long)(user), \ + (long)(group)) +#define __sanitizer_syscall_post_chown(res, filename, user, group) \ + __sanitizer_syscall_post_impl_chown(res, (long)(filename), (long)(user), \ + (long)(group)) +#define __sanitizer_syscall_pre_lchown(filename, user, group) \ + __sanitizer_syscall_pre_impl_lchown((long)(filename), (long)(user), \ + (long)(group)) +#define __sanitizer_syscall_post_lchown(res, filename, user, group) \ + __sanitizer_syscall_post_impl_lchown(res, (long)(filename), (long)(user), \ + (long)(group)) +#define __sanitizer_syscall_pre_fchown(fd, user, group) \ + __sanitizer_syscall_pre_impl_fchown((long)(fd), (long)(user), (long)(group)) +#define __sanitizer_syscall_post_fchown(res, fd, user, group) \ + __sanitizer_syscall_post_impl_fchown(res, (long)(fd), (long)(user), \ + (long)(group)) +#define __sanitizer_syscall_pre_chown16(filename, user, group) \ + __sanitizer_syscall_pre_impl_chown16((long)(filename), (long)user, \ + (long)group) +#define __sanitizer_syscall_post_chown16(res, filename, user, group) \ + __sanitizer_syscall_post_impl_chown16(res, (long)(filename), (long)user, \ + (long)group) +#define __sanitizer_syscall_pre_lchown16(filename, user, group) \ + __sanitizer_syscall_pre_impl_lchown16((long)(filename), (long)user, \ + (long)group) +#define __sanitizer_syscall_post_lchown16(res, filename, user, group) \ + __sanitizer_syscall_post_impl_lchown16(res, (long)(filename), (long)user, \ + (long)group) +#define __sanitizer_syscall_pre_fchown16(fd, user, group) \ + __sanitizer_syscall_pre_impl_fchown16((long)(fd), (long)user, (long)group) +#define __sanitizer_syscall_post_fchown16(res, fd, user, group) \ + __sanitizer_syscall_post_impl_fchown16(res, (long)(fd), (long)user, \ + (long)group) +#define __sanitizer_syscall_pre_setregid16(rgid, egid) \ + __sanitizer_syscall_pre_impl_setregid16((long)rgid, (long)egid) +#define __sanitizer_syscall_post_setregid16(res, rgid, egid) \ + __sanitizer_syscall_post_impl_setregid16(res, (long)rgid, (long)egid) +#define __sanitizer_syscall_pre_setgid16(gid) \ + __sanitizer_syscall_pre_impl_setgid16((long)gid) +#define __sanitizer_syscall_post_setgid16(res, gid) \ + __sanitizer_syscall_post_impl_setgid16(res, (long)gid) +#define __sanitizer_syscall_pre_setreuid16(ruid, euid) \ + __sanitizer_syscall_pre_impl_setreuid16((long)ruid, (long)euid) +#define __sanitizer_syscall_post_setreuid16(res, ruid, euid) \ + __sanitizer_syscall_post_impl_setreuid16(res, (long)ruid, (long)euid) +#define __sanitizer_syscall_pre_setuid16(uid) \ + __sanitizer_syscall_pre_impl_setuid16((long)uid) +#define __sanitizer_syscall_post_setuid16(res, uid) \ + __sanitizer_syscall_post_impl_setuid16(res, (long)uid) +#define __sanitizer_syscall_pre_setresuid16(ruid, euid, suid) \ + __sanitizer_syscall_pre_impl_setresuid16((long)ruid, (long)euid, (long)suid) +#define __sanitizer_syscall_post_setresuid16(res, ruid, euid, suid) \ + __sanitizer_syscall_post_impl_setresuid16(res, (long)ruid, (long)euid, \ + (long)suid) +#define __sanitizer_syscall_pre_getresuid16(ruid, euid, suid) \ + __sanitizer_syscall_pre_impl_getresuid16((long)(ruid), (long)(euid), \ + (long)(suid)) +#define __sanitizer_syscall_post_getresuid16(res, ruid, euid, suid) \ + __sanitizer_syscall_post_impl_getresuid16(res, (long)(ruid), (long)(euid), \ + (long)(suid)) +#define __sanitizer_syscall_pre_setresgid16(rgid, egid, sgid) \ + __sanitizer_syscall_pre_impl_setresgid16((long)rgid, (long)egid, (long)sgid) +#define __sanitizer_syscall_post_setresgid16(res, rgid, egid, sgid) \ + __sanitizer_syscall_post_impl_setresgid16(res, (long)rgid, (long)egid, \ + (long)sgid) +#define __sanitizer_syscall_pre_getresgid16(rgid, egid, sgid) \ + __sanitizer_syscall_pre_impl_getresgid16((long)(rgid), (long)(egid), \ + (long)(sgid)) +#define __sanitizer_syscall_post_getresgid16(res, rgid, egid, sgid) \ + __sanitizer_syscall_post_impl_getresgid16(res, (long)(rgid), (long)(egid), \ + (long)(sgid)) +#define __sanitizer_syscall_pre_setfsuid16(uid) \ + __sanitizer_syscall_pre_impl_setfsuid16((long)uid) +#define __sanitizer_syscall_post_setfsuid16(res, uid) \ + __sanitizer_syscall_post_impl_setfsuid16(res, (long)uid) +#define __sanitizer_syscall_pre_setfsgid16(gid) \ + __sanitizer_syscall_pre_impl_setfsgid16((long)gid) +#define __sanitizer_syscall_post_setfsgid16(res, gid) \ + __sanitizer_syscall_post_impl_setfsgid16(res, (long)gid) +#define __sanitizer_syscall_pre_getgroups16(gidsetsize, grouplist) \ + __sanitizer_syscall_pre_impl_getgroups16((long)(gidsetsize), \ + (long)(grouplist)) +#define __sanitizer_syscall_post_getgroups16(res, gidsetsize, grouplist) \ + __sanitizer_syscall_post_impl_getgroups16(res, (long)(gidsetsize), \ + (long)(grouplist)) +#define __sanitizer_syscall_pre_setgroups16(gidsetsize, grouplist) \ + __sanitizer_syscall_pre_impl_setgroups16((long)(gidsetsize), \ + (long)(grouplist)) +#define __sanitizer_syscall_post_setgroups16(res, gidsetsize, grouplist) \ + __sanitizer_syscall_post_impl_setgroups16(res, (long)(gidsetsize), \ + (long)(grouplist)) +#define __sanitizer_syscall_pre_getuid16() \ + __sanitizer_syscall_pre_impl_getuid16() +#define __sanitizer_syscall_post_getuid16(res) \ + __sanitizer_syscall_post_impl_getuid16(res) +#define __sanitizer_syscall_pre_geteuid16() \ + __sanitizer_syscall_pre_impl_geteuid16() +#define __sanitizer_syscall_post_geteuid16(res) \ + __sanitizer_syscall_post_impl_geteuid16(res) +#define __sanitizer_syscall_pre_getgid16() \ + __sanitizer_syscall_pre_impl_getgid16() +#define __sanitizer_syscall_post_getgid16(res) \ + __sanitizer_syscall_post_impl_getgid16(res) +#define __sanitizer_syscall_pre_getegid16() \ + __sanitizer_syscall_pre_impl_getegid16() +#define __sanitizer_syscall_post_getegid16(res) \ + __sanitizer_syscall_post_impl_getegid16(res) +#define __sanitizer_syscall_pre_utime(filename, times) \ + __sanitizer_syscall_pre_impl_utime((long)(filename), (long)(times)) +#define __sanitizer_syscall_post_utime(res, filename, times) \ + __sanitizer_syscall_post_impl_utime(res, (long)(filename), (long)(times)) +#define __sanitizer_syscall_pre_utimes(filename, utimes) \ + __sanitizer_syscall_pre_impl_utimes((long)(filename), (long)(utimes)) +#define __sanitizer_syscall_post_utimes(res, filename, utimes) \ + __sanitizer_syscall_post_impl_utimes(res, (long)(filename), (long)(utimes)) +#define __sanitizer_syscall_pre_lseek(fd, offset, origin) \ + __sanitizer_syscall_pre_impl_lseek((long)(fd), (long)(offset), (long)(origin)) +#define __sanitizer_syscall_post_lseek(res, fd, offset, origin) \ + __sanitizer_syscall_post_impl_lseek(res, (long)(fd), (long)(offset), \ + (long)(origin)) +#define __sanitizer_syscall_pre_llseek(fd, offset_high, offset_low, result, \ + origin) \ + __sanitizer_syscall_pre_impl_llseek((long)(fd), (long)(offset_high), \ + (long)(offset_low), (long)(result), \ + (long)(origin)) +#define __sanitizer_syscall_post_llseek(res, fd, offset_high, offset_low, \ + result, origin) \ + __sanitizer_syscall_post_impl_llseek(res, (long)(fd), (long)(offset_high), \ + (long)(offset_low), (long)(result), \ + (long)(origin)) +#define __sanitizer_syscall_pre_read(fd, buf, count) \ + __sanitizer_syscall_pre_impl_read((long)(fd), (long)(buf), (long)(count)) +#define __sanitizer_syscall_post_read(res, fd, buf, count) \ + __sanitizer_syscall_post_impl_read(res, (long)(fd), (long)(buf), \ + (long)(count)) +#define __sanitizer_syscall_pre_readv(fd, vec, vlen) \ + __sanitizer_syscall_pre_impl_readv((long)(fd), (long)(vec), (long)(vlen)) +#define __sanitizer_syscall_post_readv(res, fd, vec, vlen) \ + __sanitizer_syscall_post_impl_readv(res, (long)(fd), (long)(vec), \ + (long)(vlen)) +#define __sanitizer_syscall_pre_write(fd, buf, count) \ + __sanitizer_syscall_pre_impl_write((long)(fd), (long)(buf), (long)(count)) +#define __sanitizer_syscall_post_write(res, fd, buf, count) \ + __sanitizer_syscall_post_impl_write(res, (long)(fd), (long)(buf), \ + (long)(count)) +#define __sanitizer_syscall_pre_writev(fd, vec, vlen) \ + __sanitizer_syscall_pre_impl_writev((long)(fd), (long)(vec), (long)(vlen)) +#define __sanitizer_syscall_post_writev(res, fd, vec, vlen) \ + __sanitizer_syscall_post_impl_writev(res, (long)(fd), (long)(vec), \ + (long)(vlen)) + +#ifdef _LP64 +#define __sanitizer_syscall_pre_pread64(fd, buf, count, pos) \ + __sanitizer_syscall_pre_impl_pread64((long)(fd), (long)(buf), (long)(count), \ + (long)(pos)) +#define __sanitizer_syscall_post_pread64(res, fd, buf, count, pos) \ + __sanitizer_syscall_post_impl_pread64(res, (long)(fd), (long)(buf), \ + (long)(count), (long)(pos)) +#define __sanitizer_syscall_pre_pwrite64(fd, buf, count, pos) \ + __sanitizer_syscall_pre_impl_pwrite64((long)(fd), (long)(buf), \ + (long)(count), (long)(pos)) +#define __sanitizer_syscall_post_pwrite64(res, fd, buf, count, pos) \ + __sanitizer_syscall_post_impl_pwrite64(res, (long)(fd), (long)(buf), \ + (long)(count), (long)(pos)) +#else +#define __sanitizer_syscall_pre_pread64(fd, buf, count, pos0, pos1) \ + __sanitizer_syscall_pre_impl_pread64((long)(fd), (long)(buf), (long)(count), \ + (long)(pos0), (long)(pos1)) +#define __sanitizer_syscall_post_pread64(res, fd, buf, count, pos0, pos1) \ + __sanitizer_syscall_post_impl_pread64(res, (long)(fd), (long)(buf), \ + (long)(count), (long)(pos0), \ + (long)(pos1)) +#define __sanitizer_syscall_pre_pwrite64(fd, buf, count, pos0, pos1) \ + __sanitizer_syscall_pre_impl_pwrite64( \ + (long)(fd), (long)(buf), (long)(count), (long)(pos0), (long)(pos1)) +#define __sanitizer_syscall_post_pwrite64(res, fd, buf, count, pos0, pos1) \ + __sanitizer_syscall_post_impl_pwrite64( \ + res, (long)(fd), (long)(buf), (long)(count), (long)(pos0), (long)(pos1)) +#endif + +#define __sanitizer_syscall_pre_preadv(fd, vec, vlen, pos_l, pos_h) \ + __sanitizer_syscall_pre_impl_preadv((long)(fd), (long)(vec), (long)(vlen), \ + (long)(pos_l), (long)(pos_h)) +#define __sanitizer_syscall_post_preadv(res, fd, vec, vlen, pos_l, pos_h) \ + __sanitizer_syscall_post_impl_preadv(res, (long)(fd), (long)(vec), \ + (long)(vlen), (long)(pos_l), \ + (long)(pos_h)) +#define __sanitizer_syscall_pre_pwritev(fd, vec, vlen, pos_l, pos_h) \ + __sanitizer_syscall_pre_impl_pwritev((long)(fd), (long)(vec), (long)(vlen), \ + (long)(pos_l), (long)(pos_h)) +#define __sanitizer_syscall_post_pwritev(res, fd, vec, vlen, pos_l, pos_h) \ + __sanitizer_syscall_post_impl_pwritev(res, (long)(fd), (long)(vec), \ + (long)(vlen), (long)(pos_l), \ + (long)(pos_h)) +#define __sanitizer_syscall_pre_getcwd(buf, size) \ + __sanitizer_syscall_pre_impl_getcwd((long)(buf), (long)(size)) +#define __sanitizer_syscall_post_getcwd(res, buf, size) \ + __sanitizer_syscall_post_impl_getcwd(res, (long)(buf), (long)(size)) +#define __sanitizer_syscall_pre_mkdir(pathname, mode) \ + __sanitizer_syscall_pre_impl_mkdir((long)(pathname), (long)(mode)) +#define __sanitizer_syscall_post_mkdir(res, pathname, mode) \ + __sanitizer_syscall_post_impl_mkdir(res, (long)(pathname), (long)(mode)) +#define __sanitizer_syscall_pre_chdir(filename) \ + __sanitizer_syscall_pre_impl_chdir((long)(filename)) +#define __sanitizer_syscall_post_chdir(res, filename) \ + __sanitizer_syscall_post_impl_chdir(res, (long)(filename)) +#define __sanitizer_syscall_pre_fchdir(fd) \ + __sanitizer_syscall_pre_impl_fchdir((long)(fd)) +#define __sanitizer_syscall_post_fchdir(res, fd) \ + __sanitizer_syscall_post_impl_fchdir(res, (long)(fd)) +#define __sanitizer_syscall_pre_rmdir(pathname) \ + __sanitizer_syscall_pre_impl_rmdir((long)(pathname)) +#define __sanitizer_syscall_post_rmdir(res, pathname) \ + __sanitizer_syscall_post_impl_rmdir(res, (long)(pathname)) +#define __sanitizer_syscall_pre_lookup_dcookie(cookie64, buf, len) \ + __sanitizer_syscall_pre_impl_lookup_dcookie((long)(cookie64), (long)(buf), \ + (long)(len)) +#define __sanitizer_syscall_post_lookup_dcookie(res, cookie64, buf, len) \ + __sanitizer_syscall_post_impl_lookup_dcookie(res, (long)(cookie64), \ + (long)(buf), (long)(len)) +#define __sanitizer_syscall_pre_quotactl(cmd, special, id, addr) \ + __sanitizer_syscall_pre_impl_quotactl((long)(cmd), (long)(special), \ + (long)(id), (long)(addr)) +#define __sanitizer_syscall_post_quotactl(res, cmd, special, id, addr) \ + __sanitizer_syscall_post_impl_quotactl(res, (long)(cmd), (long)(special), \ + (long)(id), (long)(addr)) +#define __sanitizer_syscall_pre_getdents(fd, dirent, count) \ + __sanitizer_syscall_pre_impl_getdents((long)(fd), (long)(dirent), \ + (long)(count)) +#define __sanitizer_syscall_post_getdents(res, fd, dirent, count) \ + __sanitizer_syscall_post_impl_getdents(res, (long)(fd), (long)(dirent), \ + (long)(count)) +#define __sanitizer_syscall_pre_getdents64(fd, dirent, count) \ + __sanitizer_syscall_pre_impl_getdents64((long)(fd), (long)(dirent), \ + (long)(count)) +#define __sanitizer_syscall_post_getdents64(res, fd, dirent, count) \ + __sanitizer_syscall_post_impl_getdents64(res, (long)(fd), (long)(dirent), \ + (long)(count)) +#define __sanitizer_syscall_pre_setsockopt(fd, level, optname, optval, optlen) \ + __sanitizer_syscall_pre_impl_setsockopt((long)(fd), (long)(level), \ + (long)(optname), (long)(optval), \ + (long)(optlen)) +#define __sanitizer_syscall_post_setsockopt(res, fd, level, optname, optval, \ + optlen) \ + __sanitizer_syscall_post_impl_setsockopt(res, (long)(fd), (long)(level), \ + (long)(optname), (long)(optval), \ + (long)(optlen)) +#define __sanitizer_syscall_pre_getsockopt(fd, level, optname, optval, optlen) \ + __sanitizer_syscall_pre_impl_getsockopt((long)(fd), (long)(level), \ + (long)(optname), (long)(optval), \ + (long)(optlen)) +#define __sanitizer_syscall_post_getsockopt(res, fd, level, optname, optval, \ + optlen) \ + __sanitizer_syscall_post_impl_getsockopt(res, (long)(fd), (long)(level), \ + (long)(optname), (long)(optval), \ + (long)(optlen)) +#define __sanitizer_syscall_pre_bind(arg0, arg1, arg2) \ + __sanitizer_syscall_pre_impl_bind((long)(arg0), (long)(arg1), (long)(arg2)) +#define __sanitizer_syscall_post_bind(res, arg0, arg1, arg2) \ + __sanitizer_syscall_post_impl_bind(res, (long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_connect(arg0, arg1, arg2) \ + __sanitizer_syscall_pre_impl_connect((long)(arg0), (long)(arg1), (long)(arg2)) +#define __sanitizer_syscall_post_connect(res, arg0, arg1, arg2) \ + __sanitizer_syscall_post_impl_connect(res, (long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_accept(arg0, arg1, arg2) \ + __sanitizer_syscall_pre_impl_accept((long)(arg0), (long)(arg1), (long)(arg2)) +#define __sanitizer_syscall_post_accept(res, arg0, arg1, arg2) \ + __sanitizer_syscall_post_impl_accept(res, (long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_accept4(arg0, arg1, arg2, arg3) \ + __sanitizer_syscall_pre_impl_accept4((long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3)) +#define __sanitizer_syscall_post_accept4(res, arg0, arg1, arg2, arg3) \ + __sanitizer_syscall_post_impl_accept4(res, (long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3)) +#define __sanitizer_syscall_pre_getsockname(arg0, arg1, arg2) \ + __sanitizer_syscall_pre_impl_getsockname((long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_post_getsockname(res, arg0, arg1, arg2) \ + __sanitizer_syscall_post_impl_getsockname(res, (long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_getpeername(arg0, arg1, arg2) \ + __sanitizer_syscall_pre_impl_getpeername((long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_post_getpeername(res, arg0, arg1, arg2) \ + __sanitizer_syscall_post_impl_getpeername(res, (long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_send(arg0, arg1, arg2, arg3) \ + __sanitizer_syscall_pre_impl_send((long)(arg0), (long)(arg1), (long)(arg2), \ + (long)(arg3)) +#define __sanitizer_syscall_post_send(res, arg0, arg1, arg2, arg3) \ + __sanitizer_syscall_post_impl_send(res, (long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3)) +#define __sanitizer_syscall_pre_sendto(arg0, arg1, arg2, arg3, arg4, arg5) \ + __sanitizer_syscall_pre_impl_sendto((long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3), \ + (long)(arg4), (long)(arg5)) +#define __sanitizer_syscall_post_sendto(res, arg0, arg1, arg2, arg3, arg4, \ + arg5) \ + __sanitizer_syscall_post_impl_sendto(res, (long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3), \ + (long)(arg4), (long)(arg5)) +#define __sanitizer_syscall_pre_sendmsg(fd, msg, flags) \ + __sanitizer_syscall_pre_impl_sendmsg((long)(fd), (long)(msg), (long)(flags)) +#define __sanitizer_syscall_post_sendmsg(res, fd, msg, flags) \ + __sanitizer_syscall_post_impl_sendmsg(res, (long)(fd), (long)(msg), \ + (long)(flags)) +#define __sanitizer_syscall_pre_sendmmsg(fd, msg, vlen, flags) \ + __sanitizer_syscall_pre_impl_sendmmsg((long)(fd), (long)(msg), (long)(vlen), \ + (long)(flags)) +#define __sanitizer_syscall_post_sendmmsg(res, fd, msg, vlen, flags) \ + __sanitizer_syscall_post_impl_sendmmsg(res, (long)(fd), (long)(msg), \ + (long)(vlen), (long)(flags)) +#define __sanitizer_syscall_pre_recv(arg0, arg1, arg2, arg3) \ + __sanitizer_syscall_pre_impl_recv((long)(arg0), (long)(arg1), (long)(arg2), \ + (long)(arg3)) +#define __sanitizer_syscall_post_recv(res, arg0, arg1, arg2, arg3) \ + __sanitizer_syscall_post_impl_recv(res, (long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3)) +#define __sanitizer_syscall_pre_recvfrom(arg0, arg1, arg2, arg3, arg4, arg5) \ + __sanitizer_syscall_pre_impl_recvfrom((long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3), \ + (long)(arg4), (long)(arg5)) +#define __sanitizer_syscall_post_recvfrom(res, arg0, arg1, arg2, arg3, arg4, \ + arg5) \ + __sanitizer_syscall_post_impl_recvfrom(res, (long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3), \ + (long)(arg4), (long)(arg5)) +#define __sanitizer_syscall_pre_recvmsg(fd, msg, flags) \ + __sanitizer_syscall_pre_impl_recvmsg((long)(fd), (long)(msg), (long)(flags)) +#define __sanitizer_syscall_post_recvmsg(res, fd, msg, flags) \ + __sanitizer_syscall_post_impl_recvmsg(res, (long)(fd), (long)(msg), \ + (long)(flags)) +#define __sanitizer_syscall_pre_recvmmsg(fd, msg, vlen, flags, timeout) \ + __sanitizer_syscall_pre_impl_recvmmsg((long)(fd), (long)(msg), (long)(vlen), \ + (long)(flags), (long)(timeout)) +#define __sanitizer_syscall_post_recvmmsg(res, fd, msg, vlen, flags, timeout) \ + __sanitizer_syscall_post_impl_recvmmsg(res, (long)(fd), (long)(msg), \ + (long)(vlen), (long)(flags), \ + (long)(timeout)) +#define __sanitizer_syscall_pre_socket(arg0, arg1, arg2) \ + __sanitizer_syscall_pre_impl_socket((long)(arg0), (long)(arg1), (long)(arg2)) +#define __sanitizer_syscall_post_socket(res, arg0, arg1, arg2) \ + __sanitizer_syscall_post_impl_socket(res, (long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_socketpair(arg0, arg1, arg2, arg3) \ + __sanitizer_syscall_pre_impl_socketpair((long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3)) +#define __sanitizer_syscall_post_socketpair(res, arg0, arg1, arg2, arg3) \ + __sanitizer_syscall_post_impl_socketpair(res, (long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3)) +#define __sanitizer_syscall_pre_socketcall(call, args) \ + __sanitizer_syscall_pre_impl_socketcall((long)(call), (long)(args)) +#define __sanitizer_syscall_post_socketcall(res, call, args) \ + __sanitizer_syscall_post_impl_socketcall(res, (long)(call), (long)(args)) +#define __sanitizer_syscall_pre_listen(arg0, arg1) \ + __sanitizer_syscall_pre_impl_listen((long)(arg0), (long)(arg1)) +#define __sanitizer_syscall_post_listen(res, arg0, arg1) \ + __sanitizer_syscall_post_impl_listen(res, (long)(arg0), (long)(arg1)) +#define __sanitizer_syscall_pre_poll(ufds, nfds, timeout) \ + __sanitizer_syscall_pre_impl_poll((long)(ufds), (long)(nfds), (long)(timeout)) +#define __sanitizer_syscall_post_poll(res, ufds, nfds, timeout) \ + __sanitizer_syscall_post_impl_poll(res, (long)(ufds), (long)(nfds), \ + (long)(timeout)) +#define __sanitizer_syscall_pre_select(n, inp, outp, exp, tvp) \ + __sanitizer_syscall_pre_impl_select((long)(n), (long)(inp), (long)(outp), \ + (long)(exp), (long)(tvp)) +#define __sanitizer_syscall_post_select(res, n, inp, outp, exp, tvp) \ + __sanitizer_syscall_post_impl_select(res, (long)(n), (long)(inp), \ + (long)(outp), (long)(exp), (long)(tvp)) +#define __sanitizer_syscall_pre_old_select(arg) \ + __sanitizer_syscall_pre_impl_old_select((long)(arg)) +#define __sanitizer_syscall_post_old_select(res, arg) \ + __sanitizer_syscall_post_impl_old_select(res, (long)(arg)) +#define __sanitizer_syscall_pre_epoll_create(size) \ + __sanitizer_syscall_pre_impl_epoll_create((long)(size)) +#define __sanitizer_syscall_post_epoll_create(res, size) \ + __sanitizer_syscall_post_impl_epoll_create(res, (long)(size)) +#define __sanitizer_syscall_pre_epoll_create1(flags) \ + __sanitizer_syscall_pre_impl_epoll_create1((long)(flags)) +#define __sanitizer_syscall_post_epoll_create1(res, flags) \ + __sanitizer_syscall_post_impl_epoll_create1(res, (long)(flags)) +#define __sanitizer_syscall_pre_epoll_ctl(epfd, op, fd, event) \ + __sanitizer_syscall_pre_impl_epoll_ctl((long)(epfd), (long)(op), (long)(fd), \ + (long)(event)) +#define __sanitizer_syscall_post_epoll_ctl(res, epfd, op, fd, event) \ + __sanitizer_syscall_post_impl_epoll_ctl(res, (long)(epfd), (long)(op), \ + (long)(fd), (long)(event)) +#define __sanitizer_syscall_pre_epoll_wait(epfd, events, maxevents, timeout) \ + __sanitizer_syscall_pre_impl_epoll_wait((long)(epfd), (long)(events), \ + (long)(maxevents), (long)(timeout)) +#define __sanitizer_syscall_post_epoll_wait(res, epfd, events, maxevents, \ + timeout) \ + __sanitizer_syscall_post_impl_epoll_wait(res, (long)(epfd), (long)(events), \ + (long)(maxevents), (long)(timeout)) +#define __sanitizer_syscall_pre_epoll_pwait(epfd, events, maxevents, timeout, \ + sigmask, sigsetsize) \ + __sanitizer_syscall_pre_impl_epoll_pwait( \ + (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \ + (long)(sigmask), (long)(sigsetsize)) +#define __sanitizer_syscall_post_epoll_pwait(res, epfd, events, maxevents, \ + timeout, sigmask, sigsetsize) \ + __sanitizer_syscall_post_impl_epoll_pwait( \ + res, (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \ + (long)(sigmask), (long)(sigsetsize)) +#define __sanitizer_syscall_pre_gethostname(name, len) \ + __sanitizer_syscall_pre_impl_gethostname((long)(name), (long)(len)) +#define __sanitizer_syscall_post_gethostname(res, name, len) \ + __sanitizer_syscall_post_impl_gethostname(res, (long)(name), (long)(len)) +#define __sanitizer_syscall_pre_sethostname(name, len) \ + __sanitizer_syscall_pre_impl_sethostname((long)(name), (long)(len)) +#define __sanitizer_syscall_post_sethostname(res, name, len) \ + __sanitizer_syscall_post_impl_sethostname(res, (long)(name), (long)(len)) +#define __sanitizer_syscall_pre_setdomainname(name, len) \ + __sanitizer_syscall_pre_impl_setdomainname((long)(name), (long)(len)) +#define __sanitizer_syscall_post_setdomainname(res, name, len) \ + __sanitizer_syscall_post_impl_setdomainname(res, (long)(name), (long)(len)) +#define __sanitizer_syscall_pre_newuname(name) \ + __sanitizer_syscall_pre_impl_newuname((long)(name)) +#define __sanitizer_syscall_post_newuname(res, name) \ + __sanitizer_syscall_post_impl_newuname(res, (long)(name)) +#define __sanitizer_syscall_pre_uname(arg0) \ + __sanitizer_syscall_pre_impl_uname((long)(arg0)) +#define __sanitizer_syscall_post_uname(res, arg0) \ + __sanitizer_syscall_post_impl_uname(res, (long)(arg0)) +#define __sanitizer_syscall_pre_olduname(arg0) \ + __sanitizer_syscall_pre_impl_olduname((long)(arg0)) +#define __sanitizer_syscall_post_olduname(res, arg0) \ + __sanitizer_syscall_post_impl_olduname(res, (long)(arg0)) +#define __sanitizer_syscall_pre_getrlimit(resource, rlim) \ + __sanitizer_syscall_pre_impl_getrlimit((long)(resource), (long)(rlim)) +#define __sanitizer_syscall_post_getrlimit(res, resource, rlim) \ + __sanitizer_syscall_post_impl_getrlimit(res, (long)(resource), (long)(rlim)) +#define __sanitizer_syscall_pre_old_getrlimit(resource, rlim) \ + __sanitizer_syscall_pre_impl_old_getrlimit((long)(resource), (long)(rlim)) +#define __sanitizer_syscall_post_old_getrlimit(res, resource, rlim) \ + __sanitizer_syscall_post_impl_old_getrlimit(res, (long)(resource), \ + (long)(rlim)) +#define __sanitizer_syscall_pre_setrlimit(resource, rlim) \ + __sanitizer_syscall_pre_impl_setrlimit((long)(resource), (long)(rlim)) +#define __sanitizer_syscall_post_setrlimit(res, resource, rlim) \ + __sanitizer_syscall_post_impl_setrlimit(res, (long)(resource), (long)(rlim)) +#define __sanitizer_syscall_pre_prlimit64(pid, resource, new_rlim, old_rlim) \ + __sanitizer_syscall_pre_impl_prlimit64((long)(pid), (long)(resource), \ + (long)(new_rlim), (long)(old_rlim)) +#define __sanitizer_syscall_post_prlimit64(res, pid, resource, new_rlim, \ + old_rlim) \ + __sanitizer_syscall_post_impl_prlimit64(res, (long)(pid), (long)(resource), \ + (long)(new_rlim), (long)(old_rlim)) +#define __sanitizer_syscall_pre_getrusage(who, ru) \ + __sanitizer_syscall_pre_impl_getrusage((long)(who), (long)(ru)) +#define __sanitizer_syscall_post_getrusage(res, who, ru) \ + __sanitizer_syscall_post_impl_getrusage(res, (long)(who), (long)(ru)) +#define __sanitizer_syscall_pre_umask(mask) \ + __sanitizer_syscall_pre_impl_umask((long)(mask)) +#define __sanitizer_syscall_post_umask(res, mask) \ + __sanitizer_syscall_post_impl_umask(res, (long)(mask)) +#define __sanitizer_syscall_pre_msgget(key, msgflg) \ + __sanitizer_syscall_pre_impl_msgget((long)(key), (long)(msgflg)) +#define __sanitizer_syscall_post_msgget(res, key, msgflg) \ + __sanitizer_syscall_post_impl_msgget(res, (long)(key), (long)(msgflg)) +#define __sanitizer_syscall_pre_msgsnd(msqid, msgp, msgsz, msgflg) \ + __sanitizer_syscall_pre_impl_msgsnd((long)(msqid), (long)(msgp), \ + (long)(msgsz), (long)(msgflg)) +#define __sanitizer_syscall_post_msgsnd(res, msqid, msgp, msgsz, msgflg) \ + __sanitizer_syscall_post_impl_msgsnd(res, (long)(msqid), (long)(msgp), \ + (long)(msgsz), (long)(msgflg)) +#define __sanitizer_syscall_pre_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg) \ + __sanitizer_syscall_pre_impl_msgrcv((long)(msqid), (long)(msgp), \ + (long)(msgsz), (long)(msgtyp), \ + (long)(msgflg)) +#define __sanitizer_syscall_post_msgrcv(res, msqid, msgp, msgsz, msgtyp, \ + msgflg) \ + __sanitizer_syscall_post_impl_msgrcv(res, (long)(msqid), (long)(msgp), \ + (long)(msgsz), (long)(msgtyp), \ + (long)(msgflg)) +#define __sanitizer_syscall_pre_msgctl(msqid, cmd, buf) \ + __sanitizer_syscall_pre_impl_msgctl((long)(msqid), (long)(cmd), (long)(buf)) +#define __sanitizer_syscall_post_msgctl(res, msqid, cmd, buf) \ + __sanitizer_syscall_post_impl_msgctl(res, (long)(msqid), (long)(cmd), \ + (long)(buf)) +#define __sanitizer_syscall_pre_semget(key, nsems, semflg) \ + __sanitizer_syscall_pre_impl_semget((long)(key), (long)(nsems), \ + (long)(semflg)) +#define __sanitizer_syscall_post_semget(res, key, nsems, semflg) \ + __sanitizer_syscall_post_impl_semget(res, (long)(key), (long)(nsems), \ + (long)(semflg)) +#define __sanitizer_syscall_pre_semop(semid, sops, nsops) \ + __sanitizer_syscall_pre_impl_semop((long)(semid), (long)(sops), (long)(nsops)) +#define __sanitizer_syscall_post_semop(res, semid, sops, nsops) \ + __sanitizer_syscall_post_impl_semop(res, (long)(semid), (long)(sops), \ + (long)(nsops)) +#define __sanitizer_syscall_pre_semctl(semid, semnum, cmd, arg) \ + __sanitizer_syscall_pre_impl_semctl((long)(semid), (long)(semnum), \ + (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_post_semctl(res, semid, semnum, cmd, arg) \ + __sanitizer_syscall_post_impl_semctl(res, (long)(semid), (long)(semnum), \ + (long)(cmd), (long)(arg)) +#define __sanitizer_syscall_pre_semtimedop(semid, sops, nsops, timeout) \ + __sanitizer_syscall_pre_impl_semtimedop((long)(semid), (long)(sops), \ + (long)(nsops), (long)(timeout)) +#define __sanitizer_syscall_post_semtimedop(res, semid, sops, nsops, timeout) \ + __sanitizer_syscall_post_impl_semtimedop(res, (long)(semid), (long)(sops), \ + (long)(nsops), (long)(timeout)) +#define __sanitizer_syscall_pre_shmat(shmid, shmaddr, shmflg) \ + __sanitizer_syscall_pre_impl_shmat((long)(shmid), (long)(shmaddr), \ + (long)(shmflg)) +#define __sanitizer_syscall_post_shmat(res, shmid, shmaddr, shmflg) \ + __sanitizer_syscall_post_impl_shmat(res, (long)(shmid), (long)(shmaddr), \ + (long)(shmflg)) +#define __sanitizer_syscall_pre_shmget(key, size, flag) \ + __sanitizer_syscall_pre_impl_shmget((long)(key), (long)(size), (long)(flag)) +#define __sanitizer_syscall_post_shmget(res, key, size, flag) \ + __sanitizer_syscall_post_impl_shmget(res, (long)(key), (long)(size), \ + (long)(flag)) +#define __sanitizer_syscall_pre_shmdt(shmaddr) \ + __sanitizer_syscall_pre_impl_shmdt((long)(shmaddr)) +#define __sanitizer_syscall_post_shmdt(res, shmaddr) \ + __sanitizer_syscall_post_impl_shmdt(res, (long)(shmaddr)) +#define __sanitizer_syscall_pre_shmctl(shmid, cmd, buf) \ + __sanitizer_syscall_pre_impl_shmctl((long)(shmid), (long)(cmd), (long)(buf)) +#define __sanitizer_syscall_post_shmctl(res, shmid, cmd, buf) \ + __sanitizer_syscall_post_impl_shmctl(res, (long)(shmid), (long)(cmd), \ + (long)(buf)) +#define __sanitizer_syscall_pre_ipc(call, first, second, third, ptr, fifth) \ + __sanitizer_syscall_pre_impl_ipc((long)(call), (long)(first), \ + (long)(second), (long)(third), (long)(ptr), \ + (long)(fifth)) +#define __sanitizer_syscall_post_ipc(res, call, first, second, third, ptr, \ + fifth) \ + __sanitizer_syscall_post_impl_ipc(res, (long)(call), (long)(first), \ + (long)(second), (long)(third), \ + (long)(ptr), (long)(fifth)) +#define __sanitizer_syscall_pre_mq_open(name, oflag, mode, attr) \ + __sanitizer_syscall_pre_impl_mq_open((long)(name), (long)(oflag), \ + (long)(mode), (long)(attr)) +#define __sanitizer_syscall_post_mq_open(res, name, oflag, mode, attr) \ + __sanitizer_syscall_post_impl_mq_open(res, (long)(name), (long)(oflag), \ + (long)(mode), (long)(attr)) +#define __sanitizer_syscall_pre_mq_unlink(name) \ + __sanitizer_syscall_pre_impl_mq_unlink((long)(name)) +#define __sanitizer_syscall_post_mq_unlink(res, name) \ + __sanitizer_syscall_post_impl_mq_unlink(res, (long)(name)) +#define __sanitizer_syscall_pre_mq_timedsend(mqdes, msg_ptr, msg_len, \ + msg_prio, abs_timeout) \ + __sanitizer_syscall_pre_impl_mq_timedsend((long)(mqdes), (long)(msg_ptr), \ + (long)(msg_len), (long)(msg_prio), \ + (long)(abs_timeout)) +#define __sanitizer_syscall_post_mq_timedsend(res, mqdes, msg_ptr, msg_len, \ + msg_prio, abs_timeout) \ + __sanitizer_syscall_post_impl_mq_timedsend( \ + res, (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \ + (long)(abs_timeout)) +#define __sanitizer_syscall_pre_mq_timedreceive(mqdes, msg_ptr, msg_len, \ + msg_prio, abs_timeout) \ + __sanitizer_syscall_pre_impl_mq_timedreceive( \ + (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \ + (long)(abs_timeout)) +#define __sanitizer_syscall_post_mq_timedreceive(res, mqdes, msg_ptr, msg_len, \ + msg_prio, abs_timeout) \ + __sanitizer_syscall_post_impl_mq_timedreceive( \ + res, (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \ + (long)(abs_timeout)) +#define __sanitizer_syscall_pre_mq_notify(mqdes, notification) \ + __sanitizer_syscall_pre_impl_mq_notify((long)(mqdes), (long)(notification)) +#define __sanitizer_syscall_post_mq_notify(res, mqdes, notification) \ + __sanitizer_syscall_post_impl_mq_notify(res, (long)(mqdes), \ + (long)(notification)) +#define __sanitizer_syscall_pre_mq_getsetattr(mqdes, mqstat, omqstat) \ + __sanitizer_syscall_pre_impl_mq_getsetattr((long)(mqdes), (long)(mqstat), \ + (long)(omqstat)) +#define __sanitizer_syscall_post_mq_getsetattr(res, mqdes, mqstat, omqstat) \ + __sanitizer_syscall_post_impl_mq_getsetattr(res, (long)(mqdes), \ + (long)(mqstat), (long)(omqstat)) +#define __sanitizer_syscall_pre_pciconfig_iobase(which, bus, devfn) \ + __sanitizer_syscall_pre_impl_pciconfig_iobase((long)(which), (long)(bus), \ + (long)(devfn)) +#define __sanitizer_syscall_post_pciconfig_iobase(res, which, bus, devfn) \ + __sanitizer_syscall_post_impl_pciconfig_iobase(res, (long)(which), \ + (long)(bus), (long)(devfn)) +#define __sanitizer_syscall_pre_pciconfig_read(bus, dfn, off, len, buf) \ + __sanitizer_syscall_pre_impl_pciconfig_read( \ + (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf)) +#define __sanitizer_syscall_post_pciconfig_read(res, bus, dfn, off, len, buf) \ + __sanitizer_syscall_post_impl_pciconfig_read( \ + res, (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf)) +#define __sanitizer_syscall_pre_pciconfig_write(bus, dfn, off, len, buf) \ + __sanitizer_syscall_pre_impl_pciconfig_write( \ + (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf)) +#define __sanitizer_syscall_post_pciconfig_write(res, bus, dfn, off, len, buf) \ + __sanitizer_syscall_post_impl_pciconfig_write( \ + res, (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf)) +#define __sanitizer_syscall_pre_swapon(specialfile, swap_flags) \ + __sanitizer_syscall_pre_impl_swapon((long)(specialfile), (long)(swap_flags)) +#define __sanitizer_syscall_post_swapon(res, specialfile, swap_flags) \ + __sanitizer_syscall_post_impl_swapon(res, (long)(specialfile), \ + (long)(swap_flags)) +#define __sanitizer_syscall_pre_swapoff(specialfile) \ + __sanitizer_syscall_pre_impl_swapoff((long)(specialfile)) +#define __sanitizer_syscall_post_swapoff(res, specialfile) \ + __sanitizer_syscall_post_impl_swapoff(res, (long)(specialfile)) +#define __sanitizer_syscall_pre_sysctl(args) \ + __sanitizer_syscall_pre_impl_sysctl((long)(args)) +#define __sanitizer_syscall_post_sysctl(res, args) \ + __sanitizer_syscall_post_impl_sysctl(res, (long)(args)) +#define __sanitizer_syscall_pre_sysinfo(info) \ + __sanitizer_syscall_pre_impl_sysinfo((long)(info)) +#define __sanitizer_syscall_post_sysinfo(res, info) \ + __sanitizer_syscall_post_impl_sysinfo(res, (long)(info)) +#define __sanitizer_syscall_pre_sysfs(option, arg1, arg2) \ + __sanitizer_syscall_pre_impl_sysfs((long)(option), (long)(arg1), (long)(arg2)) +#define __sanitizer_syscall_post_sysfs(res, option, arg1, arg2) \ + __sanitizer_syscall_post_impl_sysfs(res, (long)(option), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_syslog(type, buf, len) \ + __sanitizer_syscall_pre_impl_syslog((long)(type), (long)(buf), (long)(len)) +#define __sanitizer_syscall_post_syslog(res, type, buf, len) \ + __sanitizer_syscall_post_impl_syslog(res, (long)(type), (long)(buf), \ + (long)(len)) +#define __sanitizer_syscall_pre_uselib(library) \ + __sanitizer_syscall_pre_impl_uselib((long)(library)) +#define __sanitizer_syscall_post_uselib(res, library) \ + __sanitizer_syscall_post_impl_uselib(res, (long)(library)) +#define __sanitizer_syscall_pre_ni_syscall() \ + __sanitizer_syscall_pre_impl_ni_syscall() +#define __sanitizer_syscall_post_ni_syscall(res) \ + __sanitizer_syscall_post_impl_ni_syscall(res) +#define __sanitizer_syscall_pre_ptrace(request, pid, addr, data) \ + __sanitizer_syscall_pre_impl_ptrace((long)(request), (long)(pid), \ + (long)(addr), (long)(data)) +#define __sanitizer_syscall_post_ptrace(res, request, pid, addr, data) \ + __sanitizer_syscall_post_impl_ptrace(res, (long)(request), (long)(pid), \ + (long)(addr), (long)(data)) +#define __sanitizer_syscall_pre_add_key(_type, _description, _payload, plen, \ + destringid) \ + __sanitizer_syscall_pre_impl_add_key((long)(_type), (long)(_description), \ + (long)(_payload), (long)(plen), \ + (long)(destringid)) +#define __sanitizer_syscall_post_add_key(res, _type, _description, _payload, \ + plen, destringid) \ + __sanitizer_syscall_post_impl_add_key( \ + res, (long)(_type), (long)(_description), (long)(_payload), \ + (long)(plen), (long)(destringid)) +#define __sanitizer_syscall_pre_request_key(_type, _description, \ + _callout_info, destringid) \ + __sanitizer_syscall_pre_impl_request_key( \ + (long)(_type), (long)(_description), (long)(_callout_info), \ + (long)(destringid)) +#define __sanitizer_syscall_post_request_key(res, _type, _description, \ + _callout_info, destringid) \ + __sanitizer_syscall_post_impl_request_key( \ + res, (long)(_type), (long)(_description), (long)(_callout_info), \ + (long)(destringid)) +#define __sanitizer_syscall_pre_keyctl(cmd, arg2, arg3, arg4, arg5) \ + __sanitizer_syscall_pre_impl_keyctl((long)(cmd), (long)(arg2), (long)(arg3), \ + (long)(arg4), (long)(arg5)) +#define __sanitizer_syscall_post_keyctl(res, cmd, arg2, arg3, arg4, arg5) \ + __sanitizer_syscall_post_impl_keyctl(res, (long)(cmd), (long)(arg2), \ + (long)(arg3), (long)(arg4), \ + (long)(arg5)) +#define __sanitizer_syscall_pre_ioprio_set(which, who, ioprio) \ + __sanitizer_syscall_pre_impl_ioprio_set((long)(which), (long)(who), \ + (long)(ioprio)) +#define __sanitizer_syscall_post_ioprio_set(res, which, who, ioprio) \ + __sanitizer_syscall_post_impl_ioprio_set(res, (long)(which), (long)(who), \ + (long)(ioprio)) +#define __sanitizer_syscall_pre_ioprio_get(which, who) \ + __sanitizer_syscall_pre_impl_ioprio_get((long)(which), (long)(who)) +#define __sanitizer_syscall_post_ioprio_get(res, which, who) \ + __sanitizer_syscall_post_impl_ioprio_get(res, (long)(which), (long)(who)) +#define __sanitizer_syscall_pre_set_mempolicy(mode, nmask, maxnode) \ + __sanitizer_syscall_pre_impl_set_mempolicy((long)(mode), (long)(nmask), \ + (long)(maxnode)) +#define __sanitizer_syscall_post_set_mempolicy(res, mode, nmask, maxnode) \ + __sanitizer_syscall_post_impl_set_mempolicy(res, (long)(mode), \ + (long)(nmask), (long)(maxnode)) +#define __sanitizer_syscall_pre_migrate_pages(pid, maxnode, from, to) \ + __sanitizer_syscall_pre_impl_migrate_pages((long)(pid), (long)(maxnode), \ + (long)(from), (long)(to)) +#define __sanitizer_syscall_post_migrate_pages(res, pid, maxnode, from, to) \ + __sanitizer_syscall_post_impl_migrate_pages( \ + res, (long)(pid), (long)(maxnode), (long)(from), (long)(to)) +#define __sanitizer_syscall_pre_move_pages(pid, nr_pages, pages, nodes, \ + status, flags) \ + __sanitizer_syscall_pre_impl_move_pages((long)(pid), (long)(nr_pages), \ + (long)(pages), (long)(nodes), \ + (long)(status), (long)(flags)) +#define __sanitizer_syscall_post_move_pages(res, pid, nr_pages, pages, nodes, \ + status, flags) \ + __sanitizer_syscall_post_impl_move_pages(res, (long)(pid), (long)(nr_pages), \ + (long)(pages), (long)(nodes), \ + (long)(status), (long)(flags)) +#define __sanitizer_syscall_pre_mbind(start, len, mode, nmask, maxnode, flags) \ + __sanitizer_syscall_pre_impl_mbind((long)(start), (long)(len), (long)(mode), \ + (long)(nmask), (long)(maxnode), \ + (long)(flags)) +#define __sanitizer_syscall_post_mbind(res, start, len, mode, nmask, maxnode, \ + flags) \ + __sanitizer_syscall_post_impl_mbind(res, (long)(start), (long)(len), \ + (long)(mode), (long)(nmask), \ + (long)(maxnode), (long)(flags)) +#define __sanitizer_syscall_pre_get_mempolicy(policy, nmask, maxnode, addr, \ + flags) \ + __sanitizer_syscall_pre_impl_get_mempolicy((long)(policy), (long)(nmask), \ + (long)(maxnode), (long)(addr), \ + (long)(flags)) +#define __sanitizer_syscall_post_get_mempolicy(res, policy, nmask, maxnode, \ + addr, flags) \ + __sanitizer_syscall_post_impl_get_mempolicy(res, (long)(policy), \ + (long)(nmask), (long)(maxnode), \ + (long)(addr), (long)(flags)) +#define __sanitizer_syscall_pre_inotify_init() \ + __sanitizer_syscall_pre_impl_inotify_init() +#define __sanitizer_syscall_post_inotify_init(res) \ + __sanitizer_syscall_post_impl_inotify_init(res) +#define __sanitizer_syscall_pre_inotify_init1(flags) \ + __sanitizer_syscall_pre_impl_inotify_init1((long)(flags)) +#define __sanitizer_syscall_post_inotify_init1(res, flags) \ + __sanitizer_syscall_post_impl_inotify_init1(res, (long)(flags)) +#define __sanitizer_syscall_pre_inotify_add_watch(fd, path, mask) \ + __sanitizer_syscall_pre_impl_inotify_add_watch((long)(fd), (long)(path), \ + (long)(mask)) +#define __sanitizer_syscall_post_inotify_add_watch(res, fd, path, mask) \ + __sanitizer_syscall_post_impl_inotify_add_watch(res, (long)(fd), \ + (long)(path), (long)(mask)) +#define __sanitizer_syscall_pre_inotify_rm_watch(fd, wd) \ + __sanitizer_syscall_pre_impl_inotify_rm_watch((long)(fd), (long)(wd)) +#define __sanitizer_syscall_post_inotify_rm_watch(res, fd, wd) \ + __sanitizer_syscall_post_impl_inotify_rm_watch(res, (long)(fd), (long)(wd)) +#define __sanitizer_syscall_pre_spu_run(fd, unpc, ustatus) \ + __sanitizer_syscall_pre_impl_spu_run((long)(fd), (long)(unpc), \ + (long)(ustatus)) +#define __sanitizer_syscall_post_spu_run(res, fd, unpc, ustatus) \ + __sanitizer_syscall_post_impl_spu_run(res, (long)(fd), (long)(unpc), \ + (long)(ustatus)) +#define __sanitizer_syscall_pre_spu_create(name, flags, mode, fd) \ + __sanitizer_syscall_pre_impl_spu_create((long)(name), (long)(flags), \ + (long)(mode), (long)(fd)) +#define __sanitizer_syscall_post_spu_create(res, name, flags, mode, fd) \ + __sanitizer_syscall_post_impl_spu_create(res, (long)(name), (long)(flags), \ + (long)(mode), (long)(fd)) +#define __sanitizer_syscall_pre_mknodat(dfd, filename, mode, dev) \ + __sanitizer_syscall_pre_impl_mknodat((long)(dfd), (long)(filename), \ + (long)(mode), (long)(dev)) +#define __sanitizer_syscall_post_mknodat(res, dfd, filename, mode, dev) \ + __sanitizer_syscall_post_impl_mknodat(res, (long)(dfd), (long)(filename), \ + (long)(mode), (long)(dev)) +#define __sanitizer_syscall_pre_mkdirat(dfd, pathname, mode) \ + __sanitizer_syscall_pre_impl_mkdirat((long)(dfd), (long)(pathname), \ + (long)(mode)) +#define __sanitizer_syscall_post_mkdirat(res, dfd, pathname, mode) \ + __sanitizer_syscall_post_impl_mkdirat(res, (long)(dfd), (long)(pathname), \ + (long)(mode)) +#define __sanitizer_syscall_pre_unlinkat(dfd, pathname, flag) \ + __sanitizer_syscall_pre_impl_unlinkat((long)(dfd), (long)(pathname), \ + (long)(flag)) +#define __sanitizer_syscall_post_unlinkat(res, dfd, pathname, flag) \ + __sanitizer_syscall_post_impl_unlinkat(res, (long)(dfd), (long)(pathname), \ + (long)(flag)) +#define __sanitizer_syscall_pre_symlinkat(oldname, newdfd, newname) \ + __sanitizer_syscall_pre_impl_symlinkat((long)(oldname), (long)(newdfd), \ + (long)(newname)) +#define __sanitizer_syscall_post_symlinkat(res, oldname, newdfd, newname) \ + __sanitizer_syscall_post_impl_symlinkat(res, (long)(oldname), \ + (long)(newdfd), (long)(newname)) +#define __sanitizer_syscall_pre_linkat(olddfd, oldname, newdfd, newname, \ + flags) \ + __sanitizer_syscall_pre_impl_linkat((long)(olddfd), (long)(oldname), \ + (long)(newdfd), (long)(newname), \ + (long)(flags)) +#define __sanitizer_syscall_post_linkat(res, olddfd, oldname, newdfd, newname, \ + flags) \ + __sanitizer_syscall_post_impl_linkat(res, (long)(olddfd), (long)(oldname), \ + (long)(newdfd), (long)(newname), \ + (long)(flags)) +#define __sanitizer_syscall_pre_renameat(olddfd, oldname, newdfd, newname) \ + __sanitizer_syscall_pre_impl_renameat((long)(olddfd), (long)(oldname), \ + (long)(newdfd), (long)(newname)) +#define __sanitizer_syscall_post_renameat(res, olddfd, oldname, newdfd, \ + newname) \ + __sanitizer_syscall_post_impl_renameat(res, (long)(olddfd), (long)(oldname), \ + (long)(newdfd), (long)(newname)) +#define __sanitizer_syscall_pre_futimesat(dfd, filename, utimes) \ + __sanitizer_syscall_pre_impl_futimesat((long)(dfd), (long)(filename), \ + (long)(utimes)) +#define __sanitizer_syscall_post_futimesat(res, dfd, filename, utimes) \ + __sanitizer_syscall_post_impl_futimesat(res, (long)(dfd), (long)(filename), \ + (long)(utimes)) +#define __sanitizer_syscall_pre_faccessat(dfd, filename, mode) \ + __sanitizer_syscall_pre_impl_faccessat((long)(dfd), (long)(filename), \ + (long)(mode)) +#define __sanitizer_syscall_post_faccessat(res, dfd, filename, mode) \ + __sanitizer_syscall_post_impl_faccessat(res, (long)(dfd), (long)(filename), \ + (long)(mode)) +#define __sanitizer_syscall_pre_fchmodat(dfd, filename, mode) \ + __sanitizer_syscall_pre_impl_fchmodat((long)(dfd), (long)(filename), \ + (long)(mode)) +#define __sanitizer_syscall_post_fchmodat(res, dfd, filename, mode) \ + __sanitizer_syscall_post_impl_fchmodat(res, (long)(dfd), (long)(filename), \ + (long)(mode)) +#define __sanitizer_syscall_pre_fchownat(dfd, filename, user, group, flag) \ + __sanitizer_syscall_pre_impl_fchownat((long)(dfd), (long)(filename), \ + (long)(user), (long)(group), \ + (long)(flag)) +#define __sanitizer_syscall_post_fchownat(res, dfd, filename, user, group, \ + flag) \ + __sanitizer_syscall_post_impl_fchownat(res, (long)(dfd), (long)(filename), \ + (long)(user), (long)(group), \ + (long)(flag)) +#define __sanitizer_syscall_pre_openat(dfd, filename, flags, mode) \ + __sanitizer_syscall_pre_impl_openat((long)(dfd), (long)(filename), \ + (long)(flags), (long)(mode)) +#define __sanitizer_syscall_post_openat(res, dfd, filename, flags, mode) \ + __sanitizer_syscall_post_impl_openat(res, (long)(dfd), (long)(filename), \ + (long)(flags), (long)(mode)) +#define __sanitizer_syscall_pre_newfstatat(dfd, filename, statbuf, flag) \ + __sanitizer_syscall_pre_impl_newfstatat((long)(dfd), (long)(filename), \ + (long)(statbuf), (long)(flag)) +#define __sanitizer_syscall_post_newfstatat(res, dfd, filename, statbuf, flag) \ + __sanitizer_syscall_post_impl_newfstatat(res, (long)(dfd), (long)(filename), \ + (long)(statbuf), (long)(flag)) +#define __sanitizer_syscall_pre_fstatat64(dfd, filename, statbuf, flag) \ + __sanitizer_syscall_pre_impl_fstatat64((long)(dfd), (long)(filename), \ + (long)(statbuf), (long)(flag)) +#define __sanitizer_syscall_post_fstatat64(res, dfd, filename, statbuf, flag) \ + __sanitizer_syscall_post_impl_fstatat64(res, (long)(dfd), (long)(filename), \ + (long)(statbuf), (long)(flag)) +#define __sanitizer_syscall_pre_readlinkat(dfd, path, buf, bufsiz) \ + __sanitizer_syscall_pre_impl_readlinkat((long)(dfd), (long)(path), \ + (long)(buf), (long)(bufsiz)) +#define __sanitizer_syscall_post_readlinkat(res, dfd, path, buf, bufsiz) \ + __sanitizer_syscall_post_impl_readlinkat(res, (long)(dfd), (long)(path), \ + (long)(buf), (long)(bufsiz)) +#define __sanitizer_syscall_pre_utimensat(dfd, filename, utimes, flags) \ + __sanitizer_syscall_pre_impl_utimensat((long)(dfd), (long)(filename), \ + (long)(utimes), (long)(flags)) +#define __sanitizer_syscall_post_utimensat(res, dfd, filename, utimes, flags) \ + __sanitizer_syscall_post_impl_utimensat(res, (long)(dfd), (long)(filename), \ + (long)(utimes), (long)(flags)) +#define __sanitizer_syscall_pre_unshare(unshare_flags) \ + __sanitizer_syscall_pre_impl_unshare((long)(unshare_flags)) +#define __sanitizer_syscall_post_unshare(res, unshare_flags) \ + __sanitizer_syscall_post_impl_unshare(res, (long)(unshare_flags)) +#define __sanitizer_syscall_pre_splice(fd_in, off_in, fd_out, off_out, len, \ + flags) \ + __sanitizer_syscall_pre_impl_splice((long)(fd_in), (long)(off_in), \ + (long)(fd_out), (long)(off_out), \ + (long)(len), (long)(flags)) +#define __sanitizer_syscall_post_splice(res, fd_in, off_in, fd_out, off_out, \ + len, flags) \ + __sanitizer_syscall_post_impl_splice(res, (long)(fd_in), (long)(off_in), \ + (long)(fd_out), (long)(off_out), \ + (long)(len), (long)(flags)) +#define __sanitizer_syscall_pre_vmsplice(fd, iov, nr_segs, flags) \ + __sanitizer_syscall_pre_impl_vmsplice((long)(fd), (long)(iov), \ + (long)(nr_segs), (long)(flags)) +#define __sanitizer_syscall_post_vmsplice(res, fd, iov, nr_segs, flags) \ + __sanitizer_syscall_post_impl_vmsplice(res, (long)(fd), (long)(iov), \ + (long)(nr_segs), (long)(flags)) +#define __sanitizer_syscall_pre_tee(fdin, fdout, len, flags) \ + __sanitizer_syscall_pre_impl_tee((long)(fdin), (long)(fdout), (long)(len), \ + (long)(flags)) +#define __sanitizer_syscall_post_tee(res, fdin, fdout, len, flags) \ + __sanitizer_syscall_post_impl_tee(res, (long)(fdin), (long)(fdout), \ + (long)(len), (long)(flags)) +#define __sanitizer_syscall_pre_get_robust_list(pid, head_ptr, len_ptr) \ + __sanitizer_syscall_pre_impl_get_robust_list((long)(pid), (long)(head_ptr), \ + (long)(len_ptr)) +#define __sanitizer_syscall_post_get_robust_list(res, pid, head_ptr, len_ptr) \ + __sanitizer_syscall_post_impl_get_robust_list( \ + res, (long)(pid), (long)(head_ptr), (long)(len_ptr)) +#define __sanitizer_syscall_pre_set_robust_list(head, len) \ + __sanitizer_syscall_pre_impl_set_robust_list((long)(head), (long)(len)) +#define __sanitizer_syscall_post_set_robust_list(res, head, len) \ + __sanitizer_syscall_post_impl_set_robust_list(res, (long)(head), (long)(len)) +#define __sanitizer_syscall_pre_getcpu(cpu, node, cache) \ + __sanitizer_syscall_pre_impl_getcpu((long)(cpu), (long)(node), (long)(cache)) +#define __sanitizer_syscall_post_getcpu(res, cpu, node, cache) \ + __sanitizer_syscall_post_impl_getcpu(res, (long)(cpu), (long)(node), \ + (long)(cache)) +#define __sanitizer_syscall_pre_signalfd(ufd, user_mask, sizemask) \ + __sanitizer_syscall_pre_impl_signalfd((long)(ufd), (long)(user_mask), \ + (long)(sizemask)) +#define __sanitizer_syscall_post_signalfd(res, ufd, user_mask, sizemask) \ + __sanitizer_syscall_post_impl_signalfd(res, (long)(ufd), (long)(user_mask), \ + (long)(sizemask)) +#define __sanitizer_syscall_pre_signalfd4(ufd, user_mask, sizemask, flags) \ + __sanitizer_syscall_pre_impl_signalfd4((long)(ufd), (long)(user_mask), \ + (long)(sizemask), (long)(flags)) +#define __sanitizer_syscall_post_signalfd4(res, ufd, user_mask, sizemask, \ + flags) \ + __sanitizer_syscall_post_impl_signalfd4(res, (long)(ufd), (long)(user_mask), \ + (long)(sizemask), (long)(flags)) +#define __sanitizer_syscall_pre_timerfd_create(clockid, flags) \ + __sanitizer_syscall_pre_impl_timerfd_create((long)(clockid), (long)(flags)) +#define __sanitizer_syscall_post_timerfd_create(res, clockid, flags) \ + __sanitizer_syscall_post_impl_timerfd_create(res, (long)(clockid), \ + (long)(flags)) +#define __sanitizer_syscall_pre_timerfd_settime(ufd, flags, utmr, otmr) \ + __sanitizer_syscall_pre_impl_timerfd_settime((long)(ufd), (long)(flags), \ + (long)(utmr), (long)(otmr)) +#define __sanitizer_syscall_post_timerfd_settime(res, ufd, flags, utmr, otmr) \ + __sanitizer_syscall_post_impl_timerfd_settime( \ + res, (long)(ufd), (long)(flags), (long)(utmr), (long)(otmr)) +#define __sanitizer_syscall_pre_timerfd_gettime(ufd, otmr) \ + __sanitizer_syscall_pre_impl_timerfd_gettime((long)(ufd), (long)(otmr)) +#define __sanitizer_syscall_post_timerfd_gettime(res, ufd, otmr) \ + __sanitizer_syscall_post_impl_timerfd_gettime(res, (long)(ufd), (long)(otmr)) +#define __sanitizer_syscall_pre_eventfd(count) \ + __sanitizer_syscall_pre_impl_eventfd((long)(count)) +#define __sanitizer_syscall_post_eventfd(res, count) \ + __sanitizer_syscall_post_impl_eventfd(res, (long)(count)) +#define __sanitizer_syscall_pre_eventfd2(count, flags) \ + __sanitizer_syscall_pre_impl_eventfd2((long)(count), (long)(flags)) +#define __sanitizer_syscall_post_eventfd2(res, count, flags) \ + __sanitizer_syscall_post_impl_eventfd2(res, (long)(count), (long)(flags)) +#define __sanitizer_syscall_pre_old_readdir(arg0, arg1, arg2) \ + __sanitizer_syscall_pre_impl_old_readdir((long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_post_old_readdir(res, arg0, arg1, arg2) \ + __sanitizer_syscall_post_impl_old_readdir(res, (long)(arg0), (long)(arg1), \ + (long)(arg2)) +#define __sanitizer_syscall_pre_pselect6(arg0, arg1, arg2, arg3, arg4, arg5) \ + __sanitizer_syscall_pre_impl_pselect6((long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3), \ + (long)(arg4), (long)(arg5)) +#define __sanitizer_syscall_post_pselect6(res, arg0, arg1, arg2, arg3, arg4, \ + arg5) \ + __sanitizer_syscall_post_impl_pselect6(res, (long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3), \ + (long)(arg4), (long)(arg5)) +#define __sanitizer_syscall_pre_ppoll(arg0, arg1, arg2, arg3, arg4) \ + __sanitizer_syscall_pre_impl_ppoll((long)(arg0), (long)(arg1), (long)(arg2), \ + (long)(arg3), (long)(arg4)) +#define __sanitizer_syscall_post_ppoll(res, arg0, arg1, arg2, arg3, arg4) \ + __sanitizer_syscall_post_impl_ppoll(res, (long)(arg0), (long)(arg1), \ + (long)(arg2), (long)(arg3), \ + (long)(arg4)) +#define __sanitizer_syscall_pre_syncfs(fd) \ + __sanitizer_syscall_pre_impl_syncfs((long)(fd)) +#define __sanitizer_syscall_post_syncfs(res, fd) \ + __sanitizer_syscall_post_impl_syncfs(res, (long)(fd)) +#define __sanitizer_syscall_pre_perf_event_open(attr_uptr, pid, cpu, group_fd, \ + flags) \ + __sanitizer_syscall_pre_impl_perf_event_open((long)(attr_uptr), (long)(pid), \ + (long)(cpu), (long)(group_fd), \ + (long)(flags)) +#define __sanitizer_syscall_post_perf_event_open(res, attr_uptr, pid, cpu, \ + group_fd, flags) \ + __sanitizer_syscall_post_impl_perf_event_open( \ + res, (long)(attr_uptr), (long)(pid), (long)(cpu), (long)(group_fd), \ + (long)(flags)) +#define __sanitizer_syscall_pre_mmap_pgoff(addr, len, prot, flags, fd, pgoff) \ + __sanitizer_syscall_pre_impl_mmap_pgoff((long)(addr), (long)(len), \ + (long)(prot), (long)(flags), \ + (long)(fd), (long)(pgoff)) +#define __sanitizer_syscall_post_mmap_pgoff(res, addr, len, prot, flags, fd, \ + pgoff) \ + __sanitizer_syscall_post_impl_mmap_pgoff(res, (long)(addr), (long)(len), \ + (long)(prot), (long)(flags), \ + (long)(fd), (long)(pgoff)) +#define __sanitizer_syscall_pre_old_mmap(arg) \ + __sanitizer_syscall_pre_impl_old_mmap((long)(arg)) +#define __sanitizer_syscall_post_old_mmap(res, arg) \ + __sanitizer_syscall_post_impl_old_mmap(res, (long)(arg)) +#define __sanitizer_syscall_pre_name_to_handle_at(dfd, name, handle, mnt_id, \ + flag) \ + __sanitizer_syscall_pre_impl_name_to_handle_at( \ + (long)(dfd), (long)(name), (long)(handle), (long)(mnt_id), (long)(flag)) +#define __sanitizer_syscall_post_name_to_handle_at(res, dfd, name, handle, \ + mnt_id, flag) \ + __sanitizer_syscall_post_impl_name_to_handle_at( \ + res, (long)(dfd), (long)(name), (long)(handle), (long)(mnt_id), \ + (long)(flag)) +#define __sanitizer_syscall_pre_open_by_handle_at(mountdirfd, handle, flags) \ + __sanitizer_syscall_pre_impl_open_by_handle_at( \ + (long)(mountdirfd), (long)(handle), (long)(flags)) +#define __sanitizer_syscall_post_open_by_handle_at(res, mountdirfd, handle, \ + flags) \ + __sanitizer_syscall_post_impl_open_by_handle_at( \ + res, (long)(mountdirfd), (long)(handle), (long)(flags)) +#define __sanitizer_syscall_pre_setns(fd, nstype) \ + __sanitizer_syscall_pre_impl_setns((long)(fd), (long)(nstype)) +#define __sanitizer_syscall_post_setns(res, fd, nstype) \ + __sanitizer_syscall_post_impl_setns(res, (long)(fd), (long)(nstype)) +#define __sanitizer_syscall_pre_process_vm_readv(pid, lvec, liovcnt, rvec, \ + riovcnt, flags) \ + __sanitizer_syscall_pre_impl_process_vm_readv( \ + (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \ + (long)(riovcnt), (long)(flags)) +#define __sanitizer_syscall_post_process_vm_readv(res, pid, lvec, liovcnt, \ + rvec, riovcnt, flags) \ + __sanitizer_syscall_post_impl_process_vm_readv( \ + res, (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \ + (long)(riovcnt), (long)(flags)) +#define __sanitizer_syscall_pre_process_vm_writev(pid, lvec, liovcnt, rvec, \ + riovcnt, flags) \ + __sanitizer_syscall_pre_impl_process_vm_writev( \ + (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \ + (long)(riovcnt), (long)(flags)) +#define __sanitizer_syscall_post_process_vm_writev(res, pid, lvec, liovcnt, \ + rvec, riovcnt, flags) \ + __sanitizer_syscall_post_impl_process_vm_writev( \ + res, (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \ + (long)(riovcnt), (long)(flags)) +#define __sanitizer_syscall_pre_fork() \ + __sanitizer_syscall_pre_impl_fork() +#define __sanitizer_syscall_post_fork(res) \ + __sanitizer_syscall_post_impl_fork(res) +#define __sanitizer_syscall_pre_vfork() \ + __sanitizer_syscall_pre_impl_vfork() +#define __sanitizer_syscall_post_vfork(res) \ + __sanitizer_syscall_post_impl_vfork(res) + +// And now a few syscalls we don't handle yet. +#define __sanitizer_syscall_pre_afs_syscall(...) +#define __sanitizer_syscall_pre_arch_prctl(...) +#define __sanitizer_syscall_pre_break(...) +#define __sanitizer_syscall_pre_chown32(...) +#define __sanitizer_syscall_pre_clone(...) +#define __sanitizer_syscall_pre_create_module(...) +#define __sanitizer_syscall_pre_epoll_ctl_old(...) +#define __sanitizer_syscall_pre_epoll_wait_old(...) +#define __sanitizer_syscall_pre_execve(...) +#define __sanitizer_syscall_pre_fadvise64(...) +#define __sanitizer_syscall_pre_fadvise64_64(...) +#define __sanitizer_syscall_pre_fallocate(...) +#define __sanitizer_syscall_pre_fanotify_init(...) +#define __sanitizer_syscall_pre_fanotify_mark(...) +#define __sanitizer_syscall_pre_fchown32(...) +#define __sanitizer_syscall_pre_ftime(...) +#define __sanitizer_syscall_pre_ftruncate64(...) +#define __sanitizer_syscall_pre_futex(...) +#define __sanitizer_syscall_pre_getegid32(...) +#define __sanitizer_syscall_pre_geteuid32(...) +#define __sanitizer_syscall_pre_getgid32(...) +#define __sanitizer_syscall_pre_getgroups32(...) +#define __sanitizer_syscall_pre_get_kernel_syms(...) +#define __sanitizer_syscall_pre_getpmsg(...) +#define __sanitizer_syscall_pre_getresgid32(...) +#define __sanitizer_syscall_pre_getresuid32(...) +#define __sanitizer_syscall_pre_get_thread_area(...) +#define __sanitizer_syscall_pre_getuid32(...) +#define __sanitizer_syscall_pre_gtty(...) +#define __sanitizer_syscall_pre_idle(...) +#define __sanitizer_syscall_pre_iopl(...) +#define __sanitizer_syscall_pre_lchown32(...) +#define __sanitizer_syscall_pre__llseek(...) +#define __sanitizer_syscall_pre_lock(...) +#define __sanitizer_syscall_pre_madvise1(...) +#define __sanitizer_syscall_pre_mmap(...) +#define __sanitizer_syscall_pre_mmap2(...) +#define __sanitizer_syscall_pre_modify_ldt(...) +#define __sanitizer_syscall_pre_mpx(...) +#define __sanitizer_syscall_pre__newselect(...) +#define __sanitizer_syscall_pre_nfsservctl(...) +#define __sanitizer_syscall_pre_oldfstat(...) +#define __sanitizer_syscall_pre_oldlstat(...) +#define __sanitizer_syscall_pre_oldolduname(...) +#define __sanitizer_syscall_pre_oldstat(...) +#define __sanitizer_syscall_pre_prctl(...) +#define __sanitizer_syscall_pre_prof(...) +#define __sanitizer_syscall_pre_profil(...) +#define __sanitizer_syscall_pre_putpmsg(...) +#define __sanitizer_syscall_pre_query_module(...) +#define __sanitizer_syscall_pre_readahead(...) +#define __sanitizer_syscall_pre_readdir(...) +#define __sanitizer_syscall_pre_rt_sigaction(...) +#define __sanitizer_syscall_pre_rt_sigreturn(...) +#define __sanitizer_syscall_pre_rt_sigsuspend(...) +#define __sanitizer_syscall_pre_security(...) +#define __sanitizer_syscall_pre_setfsgid32(...) +#define __sanitizer_syscall_pre_setfsuid32(...) +#define __sanitizer_syscall_pre_setgid32(...) +#define __sanitizer_syscall_pre_setgroups32(...) +#define __sanitizer_syscall_pre_setregid32(...) +#define __sanitizer_syscall_pre_setresgid32(...) +#define __sanitizer_syscall_pre_setresuid32(...) +#define __sanitizer_syscall_pre_setreuid32(...) +#define __sanitizer_syscall_pre_set_thread_area(...) +#define __sanitizer_syscall_pre_setuid32(...) +#define __sanitizer_syscall_pre_sigaction(...) +#define __sanitizer_syscall_pre_sigaltstack(...) +#define __sanitizer_syscall_pre_sigreturn(...) +#define __sanitizer_syscall_pre_sigsuspend(...) +#define __sanitizer_syscall_pre_stty(...) +#define __sanitizer_syscall_pre_sync_file_range(...) +#define __sanitizer_syscall_pre__sysctl(...) +#define __sanitizer_syscall_pre_truncate64(...) +#define __sanitizer_syscall_pre_tuxcall(...) +#define __sanitizer_syscall_pre_ugetrlimit(...) +#define __sanitizer_syscall_pre_ulimit(...) +#define __sanitizer_syscall_pre_umount2(...) +#define __sanitizer_syscall_pre_vm86(...) +#define __sanitizer_syscall_pre_vm86old(...) +#define __sanitizer_syscall_pre_vserver(...) + +#define __sanitizer_syscall_post_afs_syscall(res, ...) +#define __sanitizer_syscall_post_arch_prctl(res, ...) +#define __sanitizer_syscall_post_break(res, ...) +#define __sanitizer_syscall_post_chown32(res, ...) +#define __sanitizer_syscall_post_clone(res, ...) +#define __sanitizer_syscall_post_create_module(res, ...) +#define __sanitizer_syscall_post_epoll_ctl_old(res, ...) +#define __sanitizer_syscall_post_epoll_wait_old(res, ...) +#define __sanitizer_syscall_post_execve(res, ...) +#define __sanitizer_syscall_post_fadvise64(res, ...) +#define __sanitizer_syscall_post_fadvise64_64(res, ...) +#define __sanitizer_syscall_post_fallocate(res, ...) +#define __sanitizer_syscall_post_fanotify_init(res, ...) +#define __sanitizer_syscall_post_fanotify_mark(res, ...) +#define __sanitizer_syscall_post_fchown32(res, ...) +#define __sanitizer_syscall_post_ftime(res, ...) +#define __sanitizer_syscall_post_ftruncate64(res, ...) +#define __sanitizer_syscall_post_futex(res, ...) +#define __sanitizer_syscall_post_getegid32(res, ...) +#define __sanitizer_syscall_post_geteuid32(res, ...) +#define __sanitizer_syscall_post_getgid32(res, ...) +#define __sanitizer_syscall_post_getgroups32(res, ...) +#define __sanitizer_syscall_post_get_kernel_syms(res, ...) +#define __sanitizer_syscall_post_getpmsg(res, ...) +#define __sanitizer_syscall_post_getresgid32(res, ...) +#define __sanitizer_syscall_post_getresuid32(res, ...) +#define __sanitizer_syscall_post_get_thread_area(res, ...) +#define __sanitizer_syscall_post_getuid32(res, ...) +#define __sanitizer_syscall_post_gtty(res, ...) +#define __sanitizer_syscall_post_idle(res, ...) +#define __sanitizer_syscall_post_iopl(res, ...) +#define __sanitizer_syscall_post_lchown32(res, ...) +#define __sanitizer_syscall_post__llseek(res, ...) +#define __sanitizer_syscall_post_lock(res, ...) +#define __sanitizer_syscall_post_madvise1(res, ...) +#define __sanitizer_syscall_post_mmap2(res, ...) +#define __sanitizer_syscall_post_mmap(res, ...) +#define __sanitizer_syscall_post_modify_ldt(res, ...) +#define __sanitizer_syscall_post_mpx(res, ...) +#define __sanitizer_syscall_post__newselect(res, ...) +#define __sanitizer_syscall_post_nfsservctl(res, ...) +#define __sanitizer_syscall_post_oldfstat(res, ...) +#define __sanitizer_syscall_post_oldlstat(res, ...) +#define __sanitizer_syscall_post_oldolduname(res, ...) +#define __sanitizer_syscall_post_oldstat(res, ...) +#define __sanitizer_syscall_post_prctl(res, ...) +#define __sanitizer_syscall_post_profil(res, ...) +#define __sanitizer_syscall_post_prof(res, ...) +#define __sanitizer_syscall_post_putpmsg(res, ...) +#define __sanitizer_syscall_post_query_module(res, ...) +#define __sanitizer_syscall_post_readahead(res, ...) +#define __sanitizer_syscall_post_readdir(res, ...) +#define __sanitizer_syscall_post_rt_sigaction(res, ...) +#define __sanitizer_syscall_post_rt_sigreturn(res, ...) +#define __sanitizer_syscall_post_rt_sigsuspend(res, ...) +#define __sanitizer_syscall_post_security(res, ...) +#define __sanitizer_syscall_post_setfsgid32(res, ...) +#define __sanitizer_syscall_post_setfsuid32(res, ...) +#define __sanitizer_syscall_post_setgid32(res, ...) +#define __sanitizer_syscall_post_setgroups32(res, ...) +#define __sanitizer_syscall_post_setregid32(res, ...) +#define __sanitizer_syscall_post_setresgid32(res, ...) +#define __sanitizer_syscall_post_setresuid32(res, ...) +#define __sanitizer_syscall_post_setreuid32(res, ...) +#define __sanitizer_syscall_post_set_thread_area(res, ...) +#define __sanitizer_syscall_post_setuid32(res, ...) +#define __sanitizer_syscall_post_sigaction(res, ...) +#define __sanitizer_syscall_post_sigaltstack(res, ...) +#define __sanitizer_syscall_post_sigreturn(res, ...) +#define __sanitizer_syscall_post_sigsuspend(res, ...) +#define __sanitizer_syscall_post_stty(res, ...) +#define __sanitizer_syscall_post_sync_file_range(res, ...) +#define __sanitizer_syscall_post__sysctl(res, ...) +#define __sanitizer_syscall_post_truncate64(res, ...) +#define __sanitizer_syscall_post_tuxcall(res, ...) +#define __sanitizer_syscall_post_ugetrlimit(res, ...) +#define __sanitizer_syscall_post_ulimit(res, ...) +#define __sanitizer_syscall_post_umount2(res, ...) +#define __sanitizer_syscall_post_vm86old(res, ...) +#define __sanitizer_syscall_post_vm86(res, ...) +#define __sanitizer_syscall_post_vserver(res, ...) + +#ifdef __cplusplus +extern "C" { +#endif + +// Private declarations. Do not call directly from user code. Use macros above. +void __sanitizer_syscall_pre_impl_time(long tloc); +void __sanitizer_syscall_post_impl_time(long res, long tloc); +void __sanitizer_syscall_pre_impl_stime(long tptr); +void __sanitizer_syscall_post_impl_stime(long res, long tptr); +void __sanitizer_syscall_pre_impl_gettimeofday(long tv, long tz); +void __sanitizer_syscall_post_impl_gettimeofday(long res, long tv, long tz); +void __sanitizer_syscall_pre_impl_settimeofday(long tv, long tz); +void __sanitizer_syscall_post_impl_settimeofday(long res, long tv, long tz); +void __sanitizer_syscall_pre_impl_adjtimex(long txc_p); +void __sanitizer_syscall_post_impl_adjtimex(long res, long txc_p); +void __sanitizer_syscall_pre_impl_times(long tbuf); +void __sanitizer_syscall_post_impl_times(long res, long tbuf); +void __sanitizer_syscall_pre_impl_gettid(); +void __sanitizer_syscall_post_impl_gettid(long res); +void __sanitizer_syscall_pre_impl_nanosleep(long rqtp, long rmtp); +void __sanitizer_syscall_post_impl_nanosleep(long res, long rqtp, long rmtp); +void __sanitizer_syscall_pre_impl_alarm(long seconds); +void __sanitizer_syscall_post_impl_alarm(long res, long seconds); +void __sanitizer_syscall_pre_impl_getpid(); +void __sanitizer_syscall_post_impl_getpid(long res); +void __sanitizer_syscall_pre_impl_getppid(); +void __sanitizer_syscall_post_impl_getppid(long res); +void __sanitizer_syscall_pre_impl_getuid(); +void __sanitizer_syscall_post_impl_getuid(long res); +void __sanitizer_syscall_pre_impl_geteuid(); +void __sanitizer_syscall_post_impl_geteuid(long res); +void __sanitizer_syscall_pre_impl_getgid(); +void __sanitizer_syscall_post_impl_getgid(long res); +void __sanitizer_syscall_pre_impl_getegid(); +void __sanitizer_syscall_post_impl_getegid(long res); +void __sanitizer_syscall_pre_impl_getresuid(long ruid, long euid, long suid); +void __sanitizer_syscall_post_impl_getresuid(long res, long ruid, long euid, + long suid); +void __sanitizer_syscall_pre_impl_getresgid(long rgid, long egid, long sgid); +void __sanitizer_syscall_post_impl_getresgid(long res, long rgid, long egid, + long sgid); +void __sanitizer_syscall_pre_impl_getpgid(long pid); +void __sanitizer_syscall_post_impl_getpgid(long res, long pid); +void __sanitizer_syscall_pre_impl_getpgrp(); +void __sanitizer_syscall_post_impl_getpgrp(long res); +void __sanitizer_syscall_pre_impl_getsid(long pid); +void __sanitizer_syscall_post_impl_getsid(long res, long pid); +void __sanitizer_syscall_pre_impl_getgroups(long gidsetsize, long grouplist); +void __sanitizer_syscall_post_impl_getgroups(long res, long gidsetsize, + long grouplist); +void __sanitizer_syscall_pre_impl_setregid(long rgid, long egid); +void __sanitizer_syscall_post_impl_setregid(long res, long rgid, long egid); +void __sanitizer_syscall_pre_impl_setgid(long gid); +void __sanitizer_syscall_post_impl_setgid(long res, long gid); +void __sanitizer_syscall_pre_impl_setreuid(long ruid, long euid); +void __sanitizer_syscall_post_impl_setreuid(long res, long ruid, long euid); +void __sanitizer_syscall_pre_impl_setuid(long uid); +void __sanitizer_syscall_post_impl_setuid(long res, long uid); +void __sanitizer_syscall_pre_impl_setresuid(long ruid, long euid, long suid); +void __sanitizer_syscall_post_impl_setresuid(long res, long ruid, long euid, + long suid); +void __sanitizer_syscall_pre_impl_setresgid(long rgid, long egid, long sgid); +void __sanitizer_syscall_post_impl_setresgid(long res, long rgid, long egid, + long sgid); +void __sanitizer_syscall_pre_impl_setfsuid(long uid); +void __sanitizer_syscall_post_impl_setfsuid(long res, long uid); +void __sanitizer_syscall_pre_impl_setfsgid(long gid); +void __sanitizer_syscall_post_impl_setfsgid(long res, long gid); +void __sanitizer_syscall_pre_impl_setpgid(long pid, long pgid); +void __sanitizer_syscall_post_impl_setpgid(long res, long pid, long pgid); +void __sanitizer_syscall_pre_impl_setsid(); +void __sanitizer_syscall_post_impl_setsid(long res); +void __sanitizer_syscall_pre_impl_setgroups(long gidsetsize, long grouplist); +void __sanitizer_syscall_post_impl_setgroups(long res, long gidsetsize, + long grouplist); +void __sanitizer_syscall_pre_impl_acct(long name); +void __sanitizer_syscall_post_impl_acct(long res, long name); +void __sanitizer_syscall_pre_impl_capget(long header, long dataptr); +void __sanitizer_syscall_post_impl_capget(long res, long header, long dataptr); +void __sanitizer_syscall_pre_impl_capset(long header, long data); +void __sanitizer_syscall_post_impl_capset(long res, long header, long data); +void __sanitizer_syscall_pre_impl_personality(long personality); +void __sanitizer_syscall_post_impl_personality(long res, long personality); +void __sanitizer_syscall_pre_impl_sigpending(long set); +void __sanitizer_syscall_post_impl_sigpending(long res, long set); +void __sanitizer_syscall_pre_impl_sigprocmask(long how, long set, long oset); +void __sanitizer_syscall_post_impl_sigprocmask(long res, long how, long set, + long oset); +void __sanitizer_syscall_pre_impl_getitimer(long which, long value); +void __sanitizer_syscall_post_impl_getitimer(long res, long which, long value); +void __sanitizer_syscall_pre_impl_setitimer(long which, long value, + long ovalue); +void __sanitizer_syscall_post_impl_setitimer(long res, long which, long value, + long ovalue); +void __sanitizer_syscall_pre_impl_timer_create(long which_clock, + long timer_event_spec, + long created_timer_id); +void __sanitizer_syscall_post_impl_timer_create(long res, long which_clock, + long timer_event_spec, + long created_timer_id); +void __sanitizer_syscall_pre_impl_timer_gettime(long timer_id, long setting); +void __sanitizer_syscall_post_impl_timer_gettime(long res, long timer_id, + long setting); +void __sanitizer_syscall_pre_impl_timer_getoverrun(long timer_id); +void __sanitizer_syscall_post_impl_timer_getoverrun(long res, long timer_id); +void __sanitizer_syscall_pre_impl_timer_settime(long timer_id, long flags, + long new_setting, + long old_setting); +void __sanitizer_syscall_post_impl_timer_settime(long res, long timer_id, + long flags, long new_setting, + long old_setting); +void __sanitizer_syscall_pre_impl_timer_delete(long timer_id); +void __sanitizer_syscall_post_impl_timer_delete(long res, long timer_id); +void __sanitizer_syscall_pre_impl_clock_settime(long which_clock, long tp); +void __sanitizer_syscall_post_impl_clock_settime(long res, long which_clock, + long tp); +void __sanitizer_syscall_pre_impl_clock_gettime(long which_clock, long tp); +void __sanitizer_syscall_post_impl_clock_gettime(long res, long which_clock, + long tp); +void __sanitizer_syscall_pre_impl_clock_adjtime(long which_clock, long tx); +void __sanitizer_syscall_post_impl_clock_adjtime(long res, long which_clock, + long tx); +void __sanitizer_syscall_pre_impl_clock_getres(long which_clock, long tp); +void __sanitizer_syscall_post_impl_clock_getres(long res, long which_clock, + long tp); +void __sanitizer_syscall_pre_impl_clock_nanosleep(long which_clock, long flags, + long rqtp, long rmtp); +void __sanitizer_syscall_post_impl_clock_nanosleep(long res, long which_clock, + long flags, long rqtp, + long rmtp); +void __sanitizer_syscall_pre_impl_nice(long increment); +void __sanitizer_syscall_post_impl_nice(long res, long increment); +void __sanitizer_syscall_pre_impl_sched_setscheduler(long pid, long policy, + long param); +void __sanitizer_syscall_post_impl_sched_setscheduler(long res, long pid, + long policy, long param); +void __sanitizer_syscall_pre_impl_sched_setparam(long pid, long param); +void __sanitizer_syscall_post_impl_sched_setparam(long res, long pid, + long param); +void __sanitizer_syscall_pre_impl_sched_getscheduler(long pid); +void __sanitizer_syscall_post_impl_sched_getscheduler(long res, long pid); +void __sanitizer_syscall_pre_impl_sched_getparam(long pid, long param); +void __sanitizer_syscall_post_impl_sched_getparam(long res, long pid, + long param); +void __sanitizer_syscall_pre_impl_sched_setaffinity(long pid, long len, + long user_mask_ptr); +void __sanitizer_syscall_post_impl_sched_setaffinity(long res, long pid, + long len, + long user_mask_ptr); +void __sanitizer_syscall_pre_impl_sched_getaffinity(long pid, long len, + long user_mask_ptr); +void __sanitizer_syscall_post_impl_sched_getaffinity(long res, long pid, + long len, + long user_mask_ptr); +void __sanitizer_syscall_pre_impl_sched_yield(); +void __sanitizer_syscall_post_impl_sched_yield(long res); +void __sanitizer_syscall_pre_impl_sched_get_priority_max(long policy); +void __sanitizer_syscall_post_impl_sched_get_priority_max(long res, + long policy); +void __sanitizer_syscall_pre_impl_sched_get_priority_min(long policy); +void __sanitizer_syscall_post_impl_sched_get_priority_min(long res, + long policy); +void __sanitizer_syscall_pre_impl_sched_rr_get_interval(long pid, + long interval); +void __sanitizer_syscall_post_impl_sched_rr_get_interval(long res, long pid, + long interval); +void __sanitizer_syscall_pre_impl_setpriority(long which, long who, + long niceval); +void __sanitizer_syscall_post_impl_setpriority(long res, long which, long who, + long niceval); +void __sanitizer_syscall_pre_impl_getpriority(long which, long who); +void __sanitizer_syscall_post_impl_getpriority(long res, long which, long who); +void __sanitizer_syscall_pre_impl_shutdown(long arg0, long arg1); +void __sanitizer_syscall_post_impl_shutdown(long res, long arg0, long arg1); +void __sanitizer_syscall_pre_impl_reboot(long magic1, long magic2, long cmd, + long arg); +void __sanitizer_syscall_post_impl_reboot(long res, long magic1, long magic2, + long cmd, long arg); +void __sanitizer_syscall_pre_impl_restart_syscall(); +void __sanitizer_syscall_post_impl_restart_syscall(long res); +void __sanitizer_syscall_pre_impl_kexec_load(long entry, long nr_segments, + long segments, long flags); +void __sanitizer_syscall_post_impl_kexec_load(long res, long entry, + long nr_segments, long segments, + long flags); +void __sanitizer_syscall_pre_impl_exit(long error_code); +void __sanitizer_syscall_post_impl_exit(long res, long error_code); +void __sanitizer_syscall_pre_impl_exit_group(long error_code); +void __sanitizer_syscall_post_impl_exit_group(long res, long error_code); +void __sanitizer_syscall_pre_impl_wait4(long pid, long stat_addr, long options, + long ru); +void __sanitizer_syscall_post_impl_wait4(long res, long pid, long stat_addr, + long options, long ru); +void __sanitizer_syscall_pre_impl_waitid(long which, long pid, long infop, + long options, long ru); +void __sanitizer_syscall_post_impl_waitid(long res, long which, long pid, + long infop, long options, long ru); +void __sanitizer_syscall_pre_impl_waitpid(long pid, long stat_addr, + long options); +void __sanitizer_syscall_post_impl_waitpid(long res, long pid, long stat_addr, + long options); +void __sanitizer_syscall_pre_impl_set_tid_address(long tidptr); +void __sanitizer_syscall_post_impl_set_tid_address(long res, long tidptr); +void __sanitizer_syscall_pre_impl_init_module(long umod, long len, long uargs); +void __sanitizer_syscall_post_impl_init_module(long res, long umod, long len, + long uargs); +void __sanitizer_syscall_pre_impl_delete_module(long name_user, long flags); +void __sanitizer_syscall_post_impl_delete_module(long res, long name_user, + long flags); +void __sanitizer_syscall_pre_impl_rt_sigprocmask(long how, long set, long oset, + long sigsetsize); +void __sanitizer_syscall_post_impl_rt_sigprocmask(long res, long how, long set, + long oset, long sigsetsize); +void __sanitizer_syscall_pre_impl_rt_sigpending(long set, long sigsetsize); +void __sanitizer_syscall_post_impl_rt_sigpending(long res, long set, + long sigsetsize); +void __sanitizer_syscall_pre_impl_rt_sigtimedwait(long uthese, long uinfo, + long uts, long sigsetsize); +void __sanitizer_syscall_post_impl_rt_sigtimedwait(long res, long uthese, + long uinfo, long uts, + long sigsetsize); +void __sanitizer_syscall_pre_impl_rt_tgsigqueueinfo(long tgid, long pid, + long sig, long uinfo); +void __sanitizer_syscall_post_impl_rt_tgsigqueueinfo(long res, long tgid, + long pid, long sig, + long uinfo); +void __sanitizer_syscall_pre_impl_kill(long pid, long sig); +void __sanitizer_syscall_post_impl_kill(long res, long pid, long sig); +void __sanitizer_syscall_pre_impl_tgkill(long tgid, long pid, long sig); +void __sanitizer_syscall_post_impl_tgkill(long res, long tgid, long pid, + long sig); +void __sanitizer_syscall_pre_impl_tkill(long pid, long sig); +void __sanitizer_syscall_post_impl_tkill(long res, long pid, long sig); +void __sanitizer_syscall_pre_impl_rt_sigqueueinfo(long pid, long sig, + long uinfo); +void __sanitizer_syscall_post_impl_rt_sigqueueinfo(long res, long pid, long sig, + long uinfo); +void __sanitizer_syscall_pre_impl_sgetmask(); +void __sanitizer_syscall_post_impl_sgetmask(long res); +void __sanitizer_syscall_pre_impl_ssetmask(long newmask); +void __sanitizer_syscall_post_impl_ssetmask(long res, long newmask); +void __sanitizer_syscall_pre_impl_signal(long sig, long handler); +void __sanitizer_syscall_post_impl_signal(long res, long sig, long handler); +void __sanitizer_syscall_pre_impl_pause(); +void __sanitizer_syscall_post_impl_pause(long res); +void __sanitizer_syscall_pre_impl_sync(); +void __sanitizer_syscall_post_impl_sync(long res); +void __sanitizer_syscall_pre_impl_fsync(long fd); +void __sanitizer_syscall_post_impl_fsync(long res, long fd); +void __sanitizer_syscall_pre_impl_fdatasync(long fd); +void __sanitizer_syscall_post_impl_fdatasync(long res, long fd); +void __sanitizer_syscall_pre_impl_bdflush(long func, long data); +void __sanitizer_syscall_post_impl_bdflush(long res, long func, long data); +void __sanitizer_syscall_pre_impl_mount(long dev_name, long dir_name, long type, + long flags, long data); +void __sanitizer_syscall_post_impl_mount(long res, long dev_name, long dir_name, + long type, long flags, long data); +void __sanitizer_syscall_pre_impl_umount(long name, long flags); +void __sanitizer_syscall_post_impl_umount(long res, long name, long flags); +void __sanitizer_syscall_pre_impl_oldumount(long name); +void __sanitizer_syscall_post_impl_oldumount(long res, long name); +void __sanitizer_syscall_pre_impl_truncate(long path, long length); +void __sanitizer_syscall_post_impl_truncate(long res, long path, long length); +void __sanitizer_syscall_pre_impl_ftruncate(long fd, long length); +void __sanitizer_syscall_post_impl_ftruncate(long res, long fd, long length); +void __sanitizer_syscall_pre_impl_stat(long filename, long statbuf); +void __sanitizer_syscall_post_impl_stat(long res, long filename, long statbuf); +void __sanitizer_syscall_pre_impl_statfs(long path, long buf); +void __sanitizer_syscall_post_impl_statfs(long res, long path, long buf); +void __sanitizer_syscall_pre_impl_statfs64(long path, long sz, long buf); +void __sanitizer_syscall_post_impl_statfs64(long res, long path, long sz, + long buf); +void __sanitizer_syscall_pre_impl_fstatfs(long fd, long buf); +void __sanitizer_syscall_post_impl_fstatfs(long res, long fd, long buf); +void __sanitizer_syscall_pre_impl_fstatfs64(long fd, long sz, long buf); +void __sanitizer_syscall_post_impl_fstatfs64(long res, long fd, long sz, + long buf); +void __sanitizer_syscall_pre_impl_lstat(long filename, long statbuf); +void __sanitizer_syscall_post_impl_lstat(long res, long filename, long statbuf); +void __sanitizer_syscall_pre_impl_fstat(long fd, long statbuf); +void __sanitizer_syscall_post_impl_fstat(long res, long fd, long statbuf); +void __sanitizer_syscall_pre_impl_newstat(long filename, long statbuf); +void __sanitizer_syscall_post_impl_newstat(long res, long filename, + long statbuf); +void __sanitizer_syscall_pre_impl_newlstat(long filename, long statbuf); +void __sanitizer_syscall_post_impl_newlstat(long res, long filename, + long statbuf); +void __sanitizer_syscall_pre_impl_newfstat(long fd, long statbuf); +void __sanitizer_syscall_post_impl_newfstat(long res, long fd, long statbuf); +void __sanitizer_syscall_pre_impl_ustat(long dev, long ubuf); +void __sanitizer_syscall_post_impl_ustat(long res, long dev, long ubuf); +void __sanitizer_syscall_pre_impl_stat64(long filename, long statbuf); +void __sanitizer_syscall_post_impl_stat64(long res, long filename, + long statbuf); +void __sanitizer_syscall_pre_impl_fstat64(long fd, long statbuf); +void __sanitizer_syscall_post_impl_fstat64(long res, long fd, long statbuf); +void __sanitizer_syscall_pre_impl_lstat64(long filename, long statbuf); +void __sanitizer_syscall_post_impl_lstat64(long res, long filename, + long statbuf); +void __sanitizer_syscall_pre_impl_setxattr(long path, long name, long value, + long size, long flags); +void __sanitizer_syscall_post_impl_setxattr(long res, long path, long name, + long value, long size, long flags); +void __sanitizer_syscall_pre_impl_lsetxattr(long path, long name, long value, + long size, long flags); +void __sanitizer_syscall_post_impl_lsetxattr(long res, long path, long name, + long value, long size, long flags); +void __sanitizer_syscall_pre_impl_fsetxattr(long fd, long name, long value, + long size, long flags); +void __sanitizer_syscall_post_impl_fsetxattr(long res, long fd, long name, + long value, long size, long flags); +void __sanitizer_syscall_pre_impl_getxattr(long path, long name, long value, + long size); +void __sanitizer_syscall_post_impl_getxattr(long res, long path, long name, + long value, long size); +void __sanitizer_syscall_pre_impl_lgetxattr(long path, long name, long value, + long size); +void __sanitizer_syscall_post_impl_lgetxattr(long res, long path, long name, + long value, long size); +void __sanitizer_syscall_pre_impl_fgetxattr(long fd, long name, long value, + long size); +void __sanitizer_syscall_post_impl_fgetxattr(long res, long fd, long name, + long value, long size); +void __sanitizer_syscall_pre_impl_listxattr(long path, long list, long size); +void __sanitizer_syscall_post_impl_listxattr(long res, long path, long list, + long size); +void __sanitizer_syscall_pre_impl_llistxattr(long path, long list, long size); +void __sanitizer_syscall_post_impl_llistxattr(long res, long path, long list, + long size); +void __sanitizer_syscall_pre_impl_flistxattr(long fd, long list, long size); +void __sanitizer_syscall_post_impl_flistxattr(long res, long fd, long list, + long size); +void __sanitizer_syscall_pre_impl_removexattr(long path, long name); +void __sanitizer_syscall_post_impl_removexattr(long res, long path, long name); +void __sanitizer_syscall_pre_impl_lremovexattr(long path, long name); +void __sanitizer_syscall_post_impl_lremovexattr(long res, long path, long name); +void __sanitizer_syscall_pre_impl_fremovexattr(long fd, long name); +void __sanitizer_syscall_post_impl_fremovexattr(long res, long fd, long name); +void __sanitizer_syscall_pre_impl_brk(long brk); +void __sanitizer_syscall_post_impl_brk(long res, long brk); +void __sanitizer_syscall_pre_impl_mprotect(long start, long len, long prot); +void __sanitizer_syscall_post_impl_mprotect(long res, long start, long len, + long prot); +void __sanitizer_syscall_pre_impl_mremap(long addr, long old_len, long new_len, + long flags, long new_addr); +void __sanitizer_syscall_post_impl_mremap(long res, long addr, long old_len, + long new_len, long flags, + long new_addr); +void __sanitizer_syscall_pre_impl_remap_file_pages(long start, long size, + long prot, long pgoff, + long flags); +void __sanitizer_syscall_post_impl_remap_file_pages(long res, long start, + long size, long prot, + long pgoff, long flags); +void __sanitizer_syscall_pre_impl_msync(long start, long len, long flags); +void __sanitizer_syscall_post_impl_msync(long res, long start, long len, + long flags); +void __sanitizer_syscall_pre_impl_munmap(long addr, long len); +void __sanitizer_syscall_post_impl_munmap(long res, long addr, long len); +void __sanitizer_syscall_pre_impl_mlock(long start, long len); +void __sanitizer_syscall_post_impl_mlock(long res, long start, long len); +void __sanitizer_syscall_pre_impl_munlock(long start, long len); +void __sanitizer_syscall_post_impl_munlock(long res, long start, long len); +void __sanitizer_syscall_pre_impl_mlockall(long flags); +void __sanitizer_syscall_post_impl_mlockall(long res, long flags); +void __sanitizer_syscall_pre_impl_munlockall(); +void __sanitizer_syscall_post_impl_munlockall(long res); +void __sanitizer_syscall_pre_impl_madvise(long start, long len, long behavior); +void __sanitizer_syscall_post_impl_madvise(long res, long start, long len, + long behavior); +void __sanitizer_syscall_pre_impl_mincore(long start, long len, long vec); +void __sanitizer_syscall_post_impl_mincore(long res, long start, long len, + long vec); +void __sanitizer_syscall_pre_impl_pivot_root(long new_root, long put_old); +void __sanitizer_syscall_post_impl_pivot_root(long res, long new_root, + long put_old); +void __sanitizer_syscall_pre_impl_chroot(long filename); +void __sanitizer_syscall_post_impl_chroot(long res, long filename); +void __sanitizer_syscall_pre_impl_mknod(long filename, long mode, long dev); +void __sanitizer_syscall_post_impl_mknod(long res, long filename, long mode, + long dev); +void __sanitizer_syscall_pre_impl_link(long oldname, long newname); +void __sanitizer_syscall_post_impl_link(long res, long oldname, long newname); +void __sanitizer_syscall_pre_impl_symlink(long old, long new_); +void __sanitizer_syscall_post_impl_symlink(long res, long old, long new_); +void __sanitizer_syscall_pre_impl_unlink(long pathname); +void __sanitizer_syscall_post_impl_unlink(long res, long pathname); +void __sanitizer_syscall_pre_impl_rename(long oldname, long newname); +void __sanitizer_syscall_post_impl_rename(long res, long oldname, long newname); +void __sanitizer_syscall_pre_impl_chmod(long filename, long mode); +void __sanitizer_syscall_post_impl_chmod(long res, long filename, long mode); +void __sanitizer_syscall_pre_impl_fchmod(long fd, long mode); +void __sanitizer_syscall_post_impl_fchmod(long res, long fd, long mode); +void __sanitizer_syscall_pre_impl_fcntl(long fd, long cmd, long arg); +void __sanitizer_syscall_post_impl_fcntl(long res, long fd, long cmd, long arg); +void __sanitizer_syscall_pre_impl_fcntl64(long fd, long cmd, long arg); +void __sanitizer_syscall_post_impl_fcntl64(long res, long fd, long cmd, + long arg); +void __sanitizer_syscall_pre_impl_pipe(long fildes); +void __sanitizer_syscall_post_impl_pipe(long res, long fildes); +void __sanitizer_syscall_pre_impl_pipe2(long fildes, long flags); +void __sanitizer_syscall_post_impl_pipe2(long res, long fildes, long flags); +void __sanitizer_syscall_pre_impl_dup(long fildes); +void __sanitizer_syscall_post_impl_dup(long res, long fildes); +void __sanitizer_syscall_pre_impl_dup2(long oldfd, long newfd); +void __sanitizer_syscall_post_impl_dup2(long res, long oldfd, long newfd); +void __sanitizer_syscall_pre_impl_dup3(long oldfd, long newfd, long flags); +void __sanitizer_syscall_post_impl_dup3(long res, long oldfd, long newfd, + long flags); +void __sanitizer_syscall_pre_impl_ioperm(long from, long num, long on); +void __sanitizer_syscall_post_impl_ioperm(long res, long from, long num, + long on); +void __sanitizer_syscall_pre_impl_ioctl(long fd, long cmd, long arg); +void __sanitizer_syscall_post_impl_ioctl(long res, long fd, long cmd, long arg); +void __sanitizer_syscall_pre_impl_flock(long fd, long cmd); +void __sanitizer_syscall_post_impl_flock(long res, long fd, long cmd); +void __sanitizer_syscall_pre_impl_io_setup(long nr_reqs, long ctx); +void __sanitizer_syscall_post_impl_io_setup(long res, long nr_reqs, long ctx); +void __sanitizer_syscall_pre_impl_io_destroy(long ctx); +void __sanitizer_syscall_post_impl_io_destroy(long res, long ctx); +void __sanitizer_syscall_pre_impl_io_getevents(long ctx_id, long min_nr, + long nr, long events, + long timeout); +void __sanitizer_syscall_post_impl_io_getevents(long res, long ctx_id, + long min_nr, long nr, + long events, long timeout); +void __sanitizer_syscall_pre_impl_io_submit(long ctx_id, long arg1, long arg2); +void __sanitizer_syscall_post_impl_io_submit(long res, long ctx_id, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_io_cancel(long ctx_id, long iocb, + long result); +void __sanitizer_syscall_post_impl_io_cancel(long res, long ctx_id, long iocb, + long result); +void __sanitizer_syscall_pre_impl_sendfile(long out_fd, long in_fd, long offset, + long count); +void __sanitizer_syscall_post_impl_sendfile(long res, long out_fd, long in_fd, + long offset, long count); +void __sanitizer_syscall_pre_impl_sendfile64(long out_fd, long in_fd, + long offset, long count); +void __sanitizer_syscall_post_impl_sendfile64(long res, long out_fd, long in_fd, + long offset, long count); +void __sanitizer_syscall_pre_impl_readlink(long path, long buf, long bufsiz); +void __sanitizer_syscall_post_impl_readlink(long res, long path, long buf, + long bufsiz); +void __sanitizer_syscall_pre_impl_creat(long pathname, long mode); +void __sanitizer_syscall_post_impl_creat(long res, long pathname, long mode); +void __sanitizer_syscall_pre_impl_open(long filename, long flags, long mode); +void __sanitizer_syscall_post_impl_open(long res, long filename, long flags, + long mode); +void __sanitizer_syscall_pre_impl_close(long fd); +void __sanitizer_syscall_post_impl_close(long res, long fd); +void __sanitizer_syscall_pre_impl_access(long filename, long mode); +void __sanitizer_syscall_post_impl_access(long res, long filename, long mode); +void __sanitizer_syscall_pre_impl_vhangup(); +void __sanitizer_syscall_post_impl_vhangup(long res); +void __sanitizer_syscall_pre_impl_chown(long filename, long user, long group); +void __sanitizer_syscall_post_impl_chown(long res, long filename, long user, + long group); +void __sanitizer_syscall_pre_impl_lchown(long filename, long user, long group); +void __sanitizer_syscall_post_impl_lchown(long res, long filename, long user, + long group); +void __sanitizer_syscall_pre_impl_fchown(long fd, long user, long group); +void __sanitizer_syscall_post_impl_fchown(long res, long fd, long user, + long group); +void __sanitizer_syscall_pre_impl_chown16(long filename, long user, long group); +void __sanitizer_syscall_post_impl_chown16(long res, long filename, long user, + long group); +void __sanitizer_syscall_pre_impl_lchown16(long filename, long user, + long group); +void __sanitizer_syscall_post_impl_lchown16(long res, long filename, long user, + long group); +void __sanitizer_syscall_pre_impl_fchown16(long fd, long user, long group); +void __sanitizer_syscall_post_impl_fchown16(long res, long fd, long user, + long group); +void __sanitizer_syscall_pre_impl_setregid16(long rgid, long egid); +void __sanitizer_syscall_post_impl_setregid16(long res, long rgid, long egid); +void __sanitizer_syscall_pre_impl_setgid16(long gid); +void __sanitizer_syscall_post_impl_setgid16(long res, long gid); +void __sanitizer_syscall_pre_impl_setreuid16(long ruid, long euid); +void __sanitizer_syscall_post_impl_setreuid16(long res, long ruid, long euid); +void __sanitizer_syscall_pre_impl_setuid16(long uid); +void __sanitizer_syscall_post_impl_setuid16(long res, long uid); +void __sanitizer_syscall_pre_impl_setresuid16(long ruid, long euid, long suid); +void __sanitizer_syscall_post_impl_setresuid16(long res, long ruid, long euid, + long suid); +void __sanitizer_syscall_pre_impl_getresuid16(long ruid, long euid, long suid); +void __sanitizer_syscall_post_impl_getresuid16(long res, long ruid, long euid, + long suid); +void __sanitizer_syscall_pre_impl_setresgid16(long rgid, long egid, long sgid); +void __sanitizer_syscall_post_impl_setresgid16(long res, long rgid, long egid, + long sgid); +void __sanitizer_syscall_pre_impl_getresgid16(long rgid, long egid, long sgid); +void __sanitizer_syscall_post_impl_getresgid16(long res, long rgid, long egid, + long sgid); +void __sanitizer_syscall_pre_impl_setfsuid16(long uid); +void __sanitizer_syscall_post_impl_setfsuid16(long res, long uid); +void __sanitizer_syscall_pre_impl_setfsgid16(long gid); +void __sanitizer_syscall_post_impl_setfsgid16(long res, long gid); +void __sanitizer_syscall_pre_impl_getgroups16(long gidsetsize, long grouplist); +void __sanitizer_syscall_post_impl_getgroups16(long res, long gidsetsize, + long grouplist); +void __sanitizer_syscall_pre_impl_setgroups16(long gidsetsize, long grouplist); +void __sanitizer_syscall_post_impl_setgroups16(long res, long gidsetsize, + long grouplist); +void __sanitizer_syscall_pre_impl_getuid16(); +void __sanitizer_syscall_post_impl_getuid16(long res); +void __sanitizer_syscall_pre_impl_geteuid16(); +void __sanitizer_syscall_post_impl_geteuid16(long res); +void __sanitizer_syscall_pre_impl_getgid16(); +void __sanitizer_syscall_post_impl_getgid16(long res); +void __sanitizer_syscall_pre_impl_getegid16(); +void __sanitizer_syscall_post_impl_getegid16(long res); +void __sanitizer_syscall_pre_impl_utime(long filename, long times); +void __sanitizer_syscall_post_impl_utime(long res, long filename, long times); +void __sanitizer_syscall_pre_impl_utimes(long filename, long utimes); +void __sanitizer_syscall_post_impl_utimes(long res, long filename, long utimes); +void __sanitizer_syscall_pre_impl_lseek(long fd, long offset, long origin); +void __sanitizer_syscall_post_impl_lseek(long res, long fd, long offset, + long origin); +void __sanitizer_syscall_pre_impl_llseek(long fd, long offset_high, + long offset_low, long result, + long origin); +void __sanitizer_syscall_post_impl_llseek(long res, long fd, long offset_high, + long offset_low, long result, + long origin); +void __sanitizer_syscall_pre_impl_read(long fd, long buf, long count); +void __sanitizer_syscall_post_impl_read(long res, long fd, long buf, + long count); +void __sanitizer_syscall_pre_impl_readv(long fd, long vec, long vlen); +void __sanitizer_syscall_post_impl_readv(long res, long fd, long vec, + long vlen); +void __sanitizer_syscall_pre_impl_write(long fd, long buf, long count); +void __sanitizer_syscall_post_impl_write(long res, long fd, long buf, + long count); +void __sanitizer_syscall_pre_impl_writev(long fd, long vec, long vlen); +void __sanitizer_syscall_post_impl_writev(long res, long fd, long vec, + long vlen); + +#ifdef _LP64 +void __sanitizer_syscall_pre_impl_pread64(long fd, long buf, long count, + long pos); +void __sanitizer_syscall_post_impl_pread64(long res, long fd, long buf, + long count, long pos); +void __sanitizer_syscall_pre_impl_pwrite64(long fd, long buf, long count, + long pos); +void __sanitizer_syscall_post_impl_pwrite64(long res, long fd, long buf, + long count, long pos); +#else +void __sanitizer_syscall_pre_impl_pread64(long fd, long buf, long count, + long pos0, long pos1); +void __sanitizer_syscall_post_impl_pread64(long res, long fd, long buf, + long count, long pos0, long pos1); +void __sanitizer_syscall_pre_impl_pwrite64(long fd, long buf, long count, + long pos0, long pos1); +void __sanitizer_syscall_post_impl_pwrite64(long res, long fd, long buf, + long count, long pos0, long pos1); +#endif + +void __sanitizer_syscall_pre_impl_preadv(long fd, long vec, long vlen, + long pos_l, long pos_h); +void __sanitizer_syscall_post_impl_preadv(long res, long fd, long vec, + long vlen, long pos_l, long pos_h); +void __sanitizer_syscall_pre_impl_pwritev(long fd, long vec, long vlen, + long pos_l, long pos_h); +void __sanitizer_syscall_post_impl_pwritev(long res, long fd, long vec, + long vlen, long pos_l, long pos_h); +void __sanitizer_syscall_pre_impl_getcwd(long buf, long size); +void __sanitizer_syscall_post_impl_getcwd(long res, long buf, long size); +void __sanitizer_syscall_pre_impl_mkdir(long pathname, long mode); +void __sanitizer_syscall_post_impl_mkdir(long res, long pathname, long mode); +void __sanitizer_syscall_pre_impl_chdir(long filename); +void __sanitizer_syscall_post_impl_chdir(long res, long filename); +void __sanitizer_syscall_pre_impl_fchdir(long fd); +void __sanitizer_syscall_post_impl_fchdir(long res, long fd); +void __sanitizer_syscall_pre_impl_rmdir(long pathname); +void __sanitizer_syscall_post_impl_rmdir(long res, long pathname); +void __sanitizer_syscall_pre_impl_lookup_dcookie(long cookie64, long buf, + long len); +void __sanitizer_syscall_post_impl_lookup_dcookie(long res, long cookie64, + long buf, long len); +void __sanitizer_syscall_pre_impl_quotactl(long cmd, long special, long id, + long addr); +void __sanitizer_syscall_post_impl_quotactl(long res, long cmd, long special, + long id, long addr); +void __sanitizer_syscall_pre_impl_getdents(long fd, long dirent, long count); +void __sanitizer_syscall_post_impl_getdents(long res, long fd, long dirent, + long count); +void __sanitizer_syscall_pre_impl_getdents64(long fd, long dirent, long count); +void __sanitizer_syscall_post_impl_getdents64(long res, long fd, long dirent, + long count); +void __sanitizer_syscall_pre_impl_setsockopt(long fd, long level, long optname, + long optval, long optlen); +void __sanitizer_syscall_post_impl_setsockopt(long res, long fd, long level, + long optname, long optval, + long optlen); +void __sanitizer_syscall_pre_impl_getsockopt(long fd, long level, long optname, + long optval, long optlen); +void __sanitizer_syscall_post_impl_getsockopt(long res, long fd, long level, + long optname, long optval, + long optlen); +void __sanitizer_syscall_pre_impl_bind(long arg0, long arg1, long arg2); +void __sanitizer_syscall_post_impl_bind(long res, long arg0, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_connect(long arg0, long arg1, long arg2); +void __sanitizer_syscall_post_impl_connect(long res, long arg0, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_accept(long arg0, long arg1, long arg2); +void __sanitizer_syscall_post_impl_accept(long res, long arg0, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_accept4(long arg0, long arg1, long arg2, + long arg3); +void __sanitizer_syscall_post_impl_accept4(long res, long arg0, long arg1, + long arg2, long arg3); +void __sanitizer_syscall_pre_impl_getsockname(long arg0, long arg1, long arg2); +void __sanitizer_syscall_post_impl_getsockname(long res, long arg0, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_getpeername(long arg0, long arg1, long arg2); +void __sanitizer_syscall_post_impl_getpeername(long res, long arg0, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_send(long arg0, long arg1, long arg2, + long arg3); +void __sanitizer_syscall_post_impl_send(long res, long arg0, long arg1, + long arg2, long arg3); +void __sanitizer_syscall_pre_impl_sendto(long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5); +void __sanitizer_syscall_post_impl_sendto(long res, long arg0, long arg1, + long arg2, long arg3, long arg4, + long arg5); +void __sanitizer_syscall_pre_impl_sendmsg(long fd, long msg, long flags); +void __sanitizer_syscall_post_impl_sendmsg(long res, long fd, long msg, + long flags); +void __sanitizer_syscall_pre_impl_sendmmsg(long fd, long msg, long vlen, + long flags); +void __sanitizer_syscall_post_impl_sendmmsg(long res, long fd, long msg, + long vlen, long flags); +void __sanitizer_syscall_pre_impl_recv(long arg0, long arg1, long arg2, + long arg3); +void __sanitizer_syscall_post_impl_recv(long res, long arg0, long arg1, + long arg2, long arg3); +void __sanitizer_syscall_pre_impl_recvfrom(long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5); +void __sanitizer_syscall_post_impl_recvfrom(long res, long arg0, long arg1, + long arg2, long arg3, long arg4, + long arg5); +void __sanitizer_syscall_pre_impl_recvmsg(long fd, long msg, long flags); +void __sanitizer_syscall_post_impl_recvmsg(long res, long fd, long msg, + long flags); +void __sanitizer_syscall_pre_impl_recvmmsg(long fd, long msg, long vlen, + long flags, long timeout); +void __sanitizer_syscall_post_impl_recvmmsg(long res, long fd, long msg, + long vlen, long flags, + long timeout); +void __sanitizer_syscall_pre_impl_socket(long arg0, long arg1, long arg2); +void __sanitizer_syscall_post_impl_socket(long res, long arg0, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_socketpair(long arg0, long arg1, long arg2, + long arg3); +void __sanitizer_syscall_post_impl_socketpair(long res, long arg0, long arg1, + long arg2, long arg3); +void __sanitizer_syscall_pre_impl_socketcall(long call, long args); +void __sanitizer_syscall_post_impl_socketcall(long res, long call, long args); +void __sanitizer_syscall_pre_impl_listen(long arg0, long arg1); +void __sanitizer_syscall_post_impl_listen(long res, long arg0, long arg1); +void __sanitizer_syscall_pre_impl_poll(long ufds, long nfds, long timeout); +void __sanitizer_syscall_post_impl_poll(long res, long ufds, long nfds, + long timeout); +void __sanitizer_syscall_pre_impl_select(long n, long inp, long outp, long exp, + long tvp); +void __sanitizer_syscall_post_impl_select(long res, long n, long inp, long outp, + long exp, long tvp); +void __sanitizer_syscall_pre_impl_old_select(long arg); +void __sanitizer_syscall_post_impl_old_select(long res, long arg); +void __sanitizer_syscall_pre_impl_epoll_create(long size); +void __sanitizer_syscall_post_impl_epoll_create(long res, long size); +void __sanitizer_syscall_pre_impl_epoll_create1(long flags); +void __sanitizer_syscall_post_impl_epoll_create1(long res, long flags); +void __sanitizer_syscall_pre_impl_epoll_ctl(long epfd, long op, long fd, + long event); +void __sanitizer_syscall_post_impl_epoll_ctl(long res, long epfd, long op, + long fd, long event); +void __sanitizer_syscall_pre_impl_epoll_wait(long epfd, long events, + long maxevents, long timeout); +void __sanitizer_syscall_post_impl_epoll_wait(long res, long epfd, long events, + long maxevents, long timeout); +void __sanitizer_syscall_pre_impl_epoll_pwait(long epfd, long events, + long maxevents, long timeout, + long sigmask, long sigsetsize); +void __sanitizer_syscall_post_impl_epoll_pwait(long res, long epfd, long events, + long maxevents, long timeout, + long sigmask, long sigsetsize); +void __sanitizer_syscall_pre_impl_gethostname(long name, long len); +void __sanitizer_syscall_post_impl_gethostname(long res, long name, long len); +void __sanitizer_syscall_pre_impl_sethostname(long name, long len); +void __sanitizer_syscall_post_impl_sethostname(long res, long name, long len); +void __sanitizer_syscall_pre_impl_setdomainname(long name, long len); +void __sanitizer_syscall_post_impl_setdomainname(long res, long name, long len); +void __sanitizer_syscall_pre_impl_newuname(long name); +void __sanitizer_syscall_post_impl_newuname(long res, long name); +void __sanitizer_syscall_pre_impl_uname(long arg0); +void __sanitizer_syscall_post_impl_uname(long res, long arg0); +void __sanitizer_syscall_pre_impl_olduname(long arg0); +void __sanitizer_syscall_post_impl_olduname(long res, long arg0); +void __sanitizer_syscall_pre_impl_getrlimit(long resource, long rlim); +void __sanitizer_syscall_post_impl_getrlimit(long res, long resource, + long rlim); +void __sanitizer_syscall_pre_impl_old_getrlimit(long resource, long rlim); +void __sanitizer_syscall_post_impl_old_getrlimit(long res, long resource, + long rlim); +void __sanitizer_syscall_pre_impl_setrlimit(long resource, long rlim); +void __sanitizer_syscall_post_impl_setrlimit(long res, long resource, + long rlim); +void __sanitizer_syscall_pre_impl_prlimit64(long pid, long resource, + long new_rlim, long old_rlim); +void __sanitizer_syscall_post_impl_prlimit64(long res, long pid, long resource, + long new_rlim, long old_rlim); +void __sanitizer_syscall_pre_impl_getrusage(long who, long ru); +void __sanitizer_syscall_post_impl_getrusage(long res, long who, long ru); +void __sanitizer_syscall_pre_impl_umask(long mask); +void __sanitizer_syscall_post_impl_umask(long res, long mask); +void __sanitizer_syscall_pre_impl_msgget(long key, long msgflg); +void __sanitizer_syscall_post_impl_msgget(long res, long key, long msgflg); +void __sanitizer_syscall_pre_impl_msgsnd(long msqid, long msgp, long msgsz, + long msgflg); +void __sanitizer_syscall_post_impl_msgsnd(long res, long msqid, long msgp, + long msgsz, long msgflg); +void __sanitizer_syscall_pre_impl_msgrcv(long msqid, long msgp, long msgsz, + long msgtyp, long msgflg); +void __sanitizer_syscall_post_impl_msgrcv(long res, long msqid, long msgp, + long msgsz, long msgtyp, long msgflg); +void __sanitizer_syscall_pre_impl_msgctl(long msqid, long cmd, long buf); +void __sanitizer_syscall_post_impl_msgctl(long res, long msqid, long cmd, + long buf); +void __sanitizer_syscall_pre_impl_semget(long key, long nsems, long semflg); +void __sanitizer_syscall_post_impl_semget(long res, long key, long nsems, + long semflg); +void __sanitizer_syscall_pre_impl_semop(long semid, long sops, long nsops); +void __sanitizer_syscall_post_impl_semop(long res, long semid, long sops, + long nsops); +void __sanitizer_syscall_pre_impl_semctl(long semid, long semnum, long cmd, + long arg); +void __sanitizer_syscall_post_impl_semctl(long res, long semid, long semnum, + long cmd, long arg); +void __sanitizer_syscall_pre_impl_semtimedop(long semid, long sops, long nsops, + long timeout); +void __sanitizer_syscall_post_impl_semtimedop(long res, long semid, long sops, + long nsops, long timeout); +void __sanitizer_syscall_pre_impl_shmat(long shmid, long shmaddr, long shmflg); +void __sanitizer_syscall_post_impl_shmat(long res, long shmid, long shmaddr, + long shmflg); +void __sanitizer_syscall_pre_impl_shmget(long key, long size, long flag); +void __sanitizer_syscall_post_impl_shmget(long res, long key, long size, + long flag); +void __sanitizer_syscall_pre_impl_shmdt(long shmaddr); +void __sanitizer_syscall_post_impl_shmdt(long res, long shmaddr); +void __sanitizer_syscall_pre_impl_shmctl(long shmid, long cmd, long buf); +void __sanitizer_syscall_post_impl_shmctl(long res, long shmid, long cmd, + long buf); +void __sanitizer_syscall_pre_impl_ipc(long call, long first, long second, + long third, long ptr, long fifth); +void __sanitizer_syscall_post_impl_ipc(long res, long call, long first, + long second, long third, long ptr, + long fifth); +void __sanitizer_syscall_pre_impl_mq_open(long name, long oflag, long mode, + long attr); +void __sanitizer_syscall_post_impl_mq_open(long res, long name, long oflag, + long mode, long attr); +void __sanitizer_syscall_pre_impl_mq_unlink(long name); +void __sanitizer_syscall_post_impl_mq_unlink(long res, long name); +void __sanitizer_syscall_pre_impl_mq_timedsend(long mqdes, long msg_ptr, + long msg_len, long msg_prio, + long abs_timeout); +void __sanitizer_syscall_post_impl_mq_timedsend(long res, long mqdes, + long msg_ptr, long msg_len, + long msg_prio, + long abs_timeout); +void __sanitizer_syscall_pre_impl_mq_timedreceive(long mqdes, long msg_ptr, + long msg_len, long msg_prio, + long abs_timeout); +void __sanitizer_syscall_post_impl_mq_timedreceive(long res, long mqdes, + long msg_ptr, long msg_len, + long msg_prio, + long abs_timeout); +void __sanitizer_syscall_pre_impl_mq_notify(long mqdes, long notification); +void __sanitizer_syscall_post_impl_mq_notify(long res, long mqdes, + long notification); +void __sanitizer_syscall_pre_impl_mq_getsetattr(long mqdes, long mqstat, + long omqstat); +void __sanitizer_syscall_post_impl_mq_getsetattr(long res, long mqdes, + long mqstat, long omqstat); +void __sanitizer_syscall_pre_impl_pciconfig_iobase(long which, long bus, + long devfn); +void __sanitizer_syscall_post_impl_pciconfig_iobase(long res, long which, + long bus, long devfn); +void __sanitizer_syscall_pre_impl_pciconfig_read(long bus, long dfn, long off, + long len, long buf); +void __sanitizer_syscall_post_impl_pciconfig_read(long res, long bus, long dfn, + long off, long len, long buf); +void __sanitizer_syscall_pre_impl_pciconfig_write(long bus, long dfn, long off, + long len, long buf); +void __sanitizer_syscall_post_impl_pciconfig_write(long res, long bus, long dfn, + long off, long len, + long buf); +void __sanitizer_syscall_pre_impl_swapon(long specialfile, long swap_flags); +void __sanitizer_syscall_post_impl_swapon(long res, long specialfile, + long swap_flags); +void __sanitizer_syscall_pre_impl_swapoff(long specialfile); +void __sanitizer_syscall_post_impl_swapoff(long res, long specialfile); +void __sanitizer_syscall_pre_impl_sysctl(long args); +void __sanitizer_syscall_post_impl_sysctl(long res, long args); +void __sanitizer_syscall_pre_impl_sysinfo(long info); +void __sanitizer_syscall_post_impl_sysinfo(long res, long info); +void __sanitizer_syscall_pre_impl_sysfs(long option, long arg1, long arg2); +void __sanitizer_syscall_post_impl_sysfs(long res, long option, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_syslog(long type, long buf, long len); +void __sanitizer_syscall_post_impl_syslog(long res, long type, long buf, + long len); +void __sanitizer_syscall_pre_impl_uselib(long library); +void __sanitizer_syscall_post_impl_uselib(long res, long library); +void __sanitizer_syscall_pre_impl_ni_syscall(); +void __sanitizer_syscall_post_impl_ni_syscall(long res); +void __sanitizer_syscall_pre_impl_ptrace(long request, long pid, long addr, + long data); +void __sanitizer_syscall_post_impl_ptrace(long res, long request, long pid, + long addr, long data); +void __sanitizer_syscall_pre_impl_add_key(long _type, long _description, + long _payload, long plen, + long destringid); +void __sanitizer_syscall_post_impl_add_key(long res, long _type, + long _description, long _payload, + long plen, long destringid); +void __sanitizer_syscall_pre_impl_request_key(long _type, long _description, + long _callout_info, + long destringid); +void __sanitizer_syscall_post_impl_request_key(long res, long _type, + long _description, + long _callout_info, + long destringid); +void __sanitizer_syscall_pre_impl_keyctl(long cmd, long arg2, long arg3, + long arg4, long arg5); +void __sanitizer_syscall_post_impl_keyctl(long res, long cmd, long arg2, + long arg3, long arg4, long arg5); +void __sanitizer_syscall_pre_impl_ioprio_set(long which, long who, long ioprio); +void __sanitizer_syscall_post_impl_ioprio_set(long res, long which, long who, + long ioprio); +void __sanitizer_syscall_pre_impl_ioprio_get(long which, long who); +void __sanitizer_syscall_post_impl_ioprio_get(long res, long which, long who); +void __sanitizer_syscall_pre_impl_set_mempolicy(long mode, long nmask, + long maxnode); +void __sanitizer_syscall_post_impl_set_mempolicy(long res, long mode, + long nmask, long maxnode); +void __sanitizer_syscall_pre_impl_migrate_pages(long pid, long maxnode, + long from, long to); +void __sanitizer_syscall_post_impl_migrate_pages(long res, long pid, + long maxnode, long from, + long to); +void __sanitizer_syscall_pre_impl_move_pages(long pid, long nr_pages, + long pages, long nodes, + long status, long flags); +void __sanitizer_syscall_post_impl_move_pages(long res, long pid, long nr_pages, + long pages, long nodes, + long status, long flags); +void __sanitizer_syscall_pre_impl_mbind(long start, long len, long mode, + long nmask, long maxnode, long flags); +void __sanitizer_syscall_post_impl_mbind(long res, long start, long len, + long mode, long nmask, long maxnode, + long flags); +void __sanitizer_syscall_pre_impl_get_mempolicy(long policy, long nmask, + long maxnode, long addr, + long flags); +void __sanitizer_syscall_post_impl_get_mempolicy(long res, long policy, + long nmask, long maxnode, + long addr, long flags); +void __sanitizer_syscall_pre_impl_inotify_init(); +void __sanitizer_syscall_post_impl_inotify_init(long res); +void __sanitizer_syscall_pre_impl_inotify_init1(long flags); +void __sanitizer_syscall_post_impl_inotify_init1(long res, long flags); +void __sanitizer_syscall_pre_impl_inotify_add_watch(long fd, long path, + long mask); +void __sanitizer_syscall_post_impl_inotify_add_watch(long res, long fd, + long path, long mask); +void __sanitizer_syscall_pre_impl_inotify_rm_watch(long fd, long wd); +void __sanitizer_syscall_post_impl_inotify_rm_watch(long res, long fd, long wd); +void __sanitizer_syscall_pre_impl_spu_run(long fd, long unpc, long ustatus); +void __sanitizer_syscall_post_impl_spu_run(long res, long fd, long unpc, + long ustatus); +void __sanitizer_syscall_pre_impl_spu_create(long name, long flags, long mode, + long fd); +void __sanitizer_syscall_post_impl_spu_create(long res, long name, long flags, + long mode, long fd); +void __sanitizer_syscall_pre_impl_mknodat(long dfd, long filename, long mode, + long dev); +void __sanitizer_syscall_post_impl_mknodat(long res, long dfd, long filename, + long mode, long dev); +void __sanitizer_syscall_pre_impl_mkdirat(long dfd, long pathname, long mode); +void __sanitizer_syscall_post_impl_mkdirat(long res, long dfd, long pathname, + long mode); +void __sanitizer_syscall_pre_impl_unlinkat(long dfd, long pathname, long flag); +void __sanitizer_syscall_post_impl_unlinkat(long res, long dfd, long pathname, + long flag); +void __sanitizer_syscall_pre_impl_symlinkat(long oldname, long newdfd, + long newname); +void __sanitizer_syscall_post_impl_symlinkat(long res, long oldname, + long newdfd, long newname); +void __sanitizer_syscall_pre_impl_linkat(long olddfd, long oldname, long newdfd, + long newname, long flags); +void __sanitizer_syscall_post_impl_linkat(long res, long olddfd, long oldname, + long newdfd, long newname, + long flags); +void __sanitizer_syscall_pre_impl_renameat(long olddfd, long oldname, + long newdfd, long newname); +void __sanitizer_syscall_post_impl_renameat(long res, long olddfd, long oldname, + long newdfd, long newname); +void __sanitizer_syscall_pre_impl_futimesat(long dfd, long filename, + long utimes); +void __sanitizer_syscall_post_impl_futimesat(long res, long dfd, long filename, + long utimes); +void __sanitizer_syscall_pre_impl_faccessat(long dfd, long filename, long mode); +void __sanitizer_syscall_post_impl_faccessat(long res, long dfd, long filename, + long mode); +void __sanitizer_syscall_pre_impl_fchmodat(long dfd, long filename, long mode); +void __sanitizer_syscall_post_impl_fchmodat(long res, long dfd, long filename, + long mode); +void __sanitizer_syscall_pre_impl_fchownat(long dfd, long filename, long user, + long group, long flag); +void __sanitizer_syscall_post_impl_fchownat(long res, long dfd, long filename, + long user, long group, long flag); +void __sanitizer_syscall_pre_impl_openat(long dfd, long filename, long flags, + long mode); +void __sanitizer_syscall_post_impl_openat(long res, long dfd, long filename, + long flags, long mode); +void __sanitizer_syscall_pre_impl_newfstatat(long dfd, long filename, + long statbuf, long flag); +void __sanitizer_syscall_post_impl_newfstatat(long res, long dfd, long filename, + long statbuf, long flag); +void __sanitizer_syscall_pre_impl_fstatat64(long dfd, long filename, + long statbuf, long flag); +void __sanitizer_syscall_post_impl_fstatat64(long res, long dfd, long filename, + long statbuf, long flag); +void __sanitizer_syscall_pre_impl_readlinkat(long dfd, long path, long buf, + long bufsiz); +void __sanitizer_syscall_post_impl_readlinkat(long res, long dfd, long path, + long buf, long bufsiz); +void __sanitizer_syscall_pre_impl_utimensat(long dfd, long filename, + long utimes, long flags); +void __sanitizer_syscall_post_impl_utimensat(long res, long dfd, long filename, + long utimes, long flags); +void __sanitizer_syscall_pre_impl_unshare(long unshare_flags); +void __sanitizer_syscall_post_impl_unshare(long res, long unshare_flags); +void __sanitizer_syscall_pre_impl_splice(long fd_in, long off_in, long fd_out, + long off_out, long len, long flags); +void __sanitizer_syscall_post_impl_splice(long res, long fd_in, long off_in, + long fd_out, long off_out, long len, + long flags); +void __sanitizer_syscall_pre_impl_vmsplice(long fd, long iov, long nr_segs, + long flags); +void __sanitizer_syscall_post_impl_vmsplice(long res, long fd, long iov, + long nr_segs, long flags); +void __sanitizer_syscall_pre_impl_tee(long fdin, long fdout, long len, + long flags); +void __sanitizer_syscall_post_impl_tee(long res, long fdin, long fdout, + long len, long flags); +void __sanitizer_syscall_pre_impl_get_robust_list(long pid, long head_ptr, + long len_ptr); +void __sanitizer_syscall_post_impl_get_robust_list(long res, long pid, + long head_ptr, long len_ptr); +void __sanitizer_syscall_pre_impl_set_robust_list(long head, long len); +void __sanitizer_syscall_post_impl_set_robust_list(long res, long head, + long len); +void __sanitizer_syscall_pre_impl_getcpu(long cpu, long node, long cache); +void __sanitizer_syscall_post_impl_getcpu(long res, long cpu, long node, + long cache); +void __sanitizer_syscall_pre_impl_signalfd(long ufd, long user_mask, + long sizemask); +void __sanitizer_syscall_post_impl_signalfd(long res, long ufd, long user_mask, + long sizemask); +void __sanitizer_syscall_pre_impl_signalfd4(long ufd, long user_mask, + long sizemask, long flags); +void __sanitizer_syscall_post_impl_signalfd4(long res, long ufd, long user_mask, + long sizemask, long flags); +void __sanitizer_syscall_pre_impl_timerfd_create(long clockid, long flags); +void __sanitizer_syscall_post_impl_timerfd_create(long res, long clockid, + long flags); +void __sanitizer_syscall_pre_impl_timerfd_settime(long ufd, long flags, + long utmr, long otmr); +void __sanitizer_syscall_post_impl_timerfd_settime(long res, long ufd, + long flags, long utmr, + long otmr); +void __sanitizer_syscall_pre_impl_timerfd_gettime(long ufd, long otmr); +void __sanitizer_syscall_post_impl_timerfd_gettime(long res, long ufd, + long otmr); +void __sanitizer_syscall_pre_impl_eventfd(long count); +void __sanitizer_syscall_post_impl_eventfd(long res, long count); +void __sanitizer_syscall_pre_impl_eventfd2(long count, long flags); +void __sanitizer_syscall_post_impl_eventfd2(long res, long count, long flags); +void __sanitizer_syscall_pre_impl_old_readdir(long arg0, long arg1, long arg2); +void __sanitizer_syscall_post_impl_old_readdir(long res, long arg0, long arg1, + long arg2); +void __sanitizer_syscall_pre_impl_pselect6(long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5); +void __sanitizer_syscall_post_impl_pselect6(long res, long arg0, long arg1, + long arg2, long arg3, long arg4, + long arg5); +void __sanitizer_syscall_pre_impl_ppoll(long arg0, long arg1, long arg2, + long arg3, long arg4); +void __sanitizer_syscall_post_impl_ppoll(long res, long arg0, long arg1, + long arg2, long arg3, long arg4); +void __sanitizer_syscall_pre_impl_fanotify_init(long flags, long event_f_flags); +void __sanitizer_syscall_post_impl_fanotify_init(long res, long flags, + long event_f_flags); +void __sanitizer_syscall_pre_impl_fanotify_mark(long fanotify_fd, long flags, + long mask, long fd, + long pathname); +void __sanitizer_syscall_post_impl_fanotify_mark(long res, long fanotify_fd, + long flags, long mask, long fd, + long pathname); +void __sanitizer_syscall_pre_impl_syncfs(long fd); +void __sanitizer_syscall_post_impl_syncfs(long res, long fd); +void __sanitizer_syscall_pre_impl_perf_event_open(long attr_uptr, long pid, + long cpu, long group_fd, + long flags); +void __sanitizer_syscall_post_impl_perf_event_open(long res, long attr_uptr, + long pid, long cpu, + long group_fd, long flags); +void __sanitizer_syscall_pre_impl_mmap_pgoff(long addr, long len, long prot, + long flags, long fd, long pgoff); +void __sanitizer_syscall_post_impl_mmap_pgoff(long res, long addr, long len, + long prot, long flags, long fd, + long pgoff); +void __sanitizer_syscall_pre_impl_old_mmap(long arg); +void __sanitizer_syscall_post_impl_old_mmap(long res, long arg); +void __sanitizer_syscall_pre_impl_name_to_handle_at(long dfd, long name, + long handle, long mnt_id, + long flag); +void __sanitizer_syscall_post_impl_name_to_handle_at(long res, long dfd, + long name, long handle, + long mnt_id, long flag); +void __sanitizer_syscall_pre_impl_open_by_handle_at(long mountdirfd, + long handle, long flags); +void __sanitizer_syscall_post_impl_open_by_handle_at(long res, long mountdirfd, + long handle, long flags); +void __sanitizer_syscall_pre_impl_setns(long fd, long nstype); +void __sanitizer_syscall_post_impl_setns(long res, long fd, long nstype); +void __sanitizer_syscall_pre_impl_process_vm_readv(long pid, long lvec, + long liovcnt, long rvec, + long riovcnt, long flags); +void __sanitizer_syscall_post_impl_process_vm_readv(long res, long pid, + long lvec, long liovcnt, + long rvec, long riovcnt, + long flags); +void __sanitizer_syscall_pre_impl_process_vm_writev(long pid, long lvec, + long liovcnt, long rvec, + long riovcnt, long flags); +void __sanitizer_syscall_post_impl_process_vm_writev(long res, long pid, + long lvec, long liovcnt, + long rvec, long riovcnt, + long flags); +void __sanitizer_syscall_pre_impl_fork(); +void __sanitizer_syscall_post_impl_fork(long res); +void __sanitizer_syscall_pre_impl_vfork(); +void __sanitizer_syscall_post_impl_vfork(long res); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // SANITIZER_LINUX_SYSCALL_HOOKS_H diff --git a/python/clang/5.1/include/sanitizer/lsan_interface.h b/python/clang/5.1/include/sanitizer/lsan_interface.h new file mode 100644 index 00000000..df256c0e --- /dev/null +++ b/python/clang/5.1/include/sanitizer/lsan_interface.h @@ -0,0 +1,52 @@ +//===-- sanitizer/lsan_interface.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of LeakSanitizer. +// +// Public interface header. +//===----------------------------------------------------------------------===// +#ifndef SANITIZER_LSAN_INTERFACE_H +#define SANITIZER_LSAN_INTERFACE_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + // Allocations made between calls to __lsan_disable() and __lsan_enable() will + // be treated as non-leaks. Disable/enable pairs may be nested. + void __lsan_disable(); + void __lsan_enable(); + // The heap object into which p points will be treated as a non-leak. + void __lsan_ignore_object(const void *p); + // The user may optionally provide this function to disallow leak checking + // for the program it is linked into (if the return value is non-zero). This + // function must be defined as returning a constant value; any behavior beyond + // that is unsupported. + int __lsan_is_turned_off(); + // Calling this function makes LSan enter the leak checking phase immediately. + // Use this if normal end-of-process leak checking happens too late (e.g. if + // you have intentional memory leaks in your shutdown code). Calling this + // function overrides end-of-process leak checking; it must be called at + // most once per process. This function will terminate the process if there + // are memory leaks and the exit_code flag is non-zero. + void __lsan_do_leak_check(); +#ifdef __cplusplus +} // extern "C" + +namespace __lsan { +class ScopedDisabler { + public: + ScopedDisabler() { __lsan_disable(); } + ~ScopedDisabler() { __lsan_enable(); } +}; +} // namespace __lsan +#endif + +#endif // SANITIZER_LSAN_INTERFACE_H diff --git a/python/clang/5.1/include/sanitizer/msan_interface.h b/python/clang/5.1/include/sanitizer/msan_interface.h new file mode 100644 index 00000000..63af84fc --- /dev/null +++ b/python/clang/5.1/include/sanitizer/msan_interface.h @@ -0,0 +1,162 @@ +//===-- msan_interface.h --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of MemorySanitizer. +// +// Public interface header. +//===----------------------------------------------------------------------===// +#ifndef MSAN_INTERFACE_H +#define MSAN_INTERFACE_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if __has_feature(memory_sanitizer) + /* Returns a string describing a stack origin. + Return NULL if the origin is invalid, or is not a stack origin. */ + const char *__msan_get_origin_descr_if_stack(uint32_t id); + + + /* Set raw origin for the memory range. */ + void __msan_set_origin(const volatile void *a, size_t size, uint32_t origin); + + /* Get raw origin for an address. */ + uint32_t __msan_get_origin(const volatile void *a); + + /* Returns non-zero if tracking origins. */ + int __msan_get_track_origins(); + + /* Returns the origin id of the latest UMR in the calling thread. */ + uint32_t __msan_get_umr_origin(); + + /* Make memory region fully initialized (without changing its contents). */ + void __msan_unpoison(const volatile void *a, size_t size); + + /* Make memory region fully uninitialized (without changing its contents). */ + void __msan_poison(const volatile void *a, size_t size); + + /* Make memory region partially uninitialized (without changing its contents). + */ + void __msan_partial_poison(const volatile void *data, void *shadow, + size_t size); + + /* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ + intptr_t __msan_test_shadow(const volatile void *x, size_t size); + + /* Set exit code when error(s) were detected. + Value of 0 means don't change the program exit code. */ + void __msan_set_exit_code(int exit_code); + + /* For testing: + __msan_set_expect_umr(1); + ... some buggy code ... + __msan_set_expect_umr(0); + The last line will verify that a UMR happened. */ + void __msan_set_expect_umr(int expect_umr); + + /* Change the value of keep_going flag. Non-zero value means don't terminate + program execution when an error is detected. This will not affect error in + modules that were compiled without the corresponding compiler flag. */ + void __msan_set_keep_going(int keep_going); + + /* Print shadow and origin for the memory range to stdout in a human-readable + format. */ + void __msan_print_shadow(const volatile void *x, size_t size); + + /* Print current function arguments shadow and origin to stdout in a + human-readable format. */ + void __msan_print_param_shadow(); + + /* Returns true if running under a dynamic tool (DynamoRio-based). */ + int __msan_has_dynamic_component(); + + /* Tell MSan about newly allocated memory (ex.: custom allocator). + Memory will be marked uninitialized, with origin at the call site. */ + void __msan_allocated_memory(const volatile void* data, size_t size); + + /* This function may be optionally provided by user and should return + a string containing Msan runtime options. See msan_flags.h for details. */ + const char* __msan_default_options(); + + + /***********************************/ + /* Allocator statistics interface. */ + + /* Returns the estimated number of bytes that will be reserved by allocator + for request of "size" bytes. If Msan allocator can't allocate that much + memory, returns the maximal possible allocation size, otherwise returns + "size". */ + size_t __msan_get_estimated_allocated_size(size_t size); + + /* Returns true if p was returned by the Msan allocator and + is not yet freed. */ + int __msan_get_ownership(const volatile void *p); + + /* Returns the number of bytes reserved for the pointer p. + Requires (get_ownership(p) == true) or (p == 0). */ + size_t __msan_get_allocated_size(const volatile void *p); + + /* Number of bytes, allocated and not yet freed by the application. */ + size_t __msan_get_current_allocated_bytes(); + + /* Number of bytes, mmaped by msan allocator to fulfill allocation requests. + Generally, for request of X bytes, allocator can reserve and add to free + lists a large number of chunks of size X to use them for future requests. + All these chunks count toward the heap size. Currently, allocator never + releases memory to OS (instead, it just puts freed chunks to free + lists). */ + size_t __msan_get_heap_size(); + + /* Number of bytes, mmaped by msan allocator, which can be used to fulfill + allocation requests. When a user program frees memory chunk, it can first + fall into quarantine and will count toward __msan_get_free_bytes() + later. */ + size_t __msan_get_free_bytes(); + + /* Number of bytes in unmapped pages, that are released to OS. Currently, + always returns 0. */ + size_t __msan_get_unmapped_bytes(); + + /* Malloc hooks that may be optionally provided by user. + __msan_malloc_hook(ptr, size) is called immediately after + allocation of "size" bytes, which returned "ptr". + __msan_free_hook(ptr) is called immediately before + deallocation of "ptr". */ + void __msan_malloc_hook(const volatile void *ptr, size_t size); + void __msan_free_hook(const volatile void *ptr); + +#else // __has_feature(memory_sanitizer) + +#define __msan_get_origin_descr_if_stack(id) ((const char*)0) +#define __msan_set_origin(a, size, origin) +#define __msan_get_origin(a) ((uint32_t)-1) +#define __msan_get_track_origins() (0) +#define __msan_get_umr_origin() ((uint32_t)-1) +#define __msan_unpoison(a, size) +#define __msan_poison(a, size) +#define __msan_partial_poison(data, shadow, size) +#define __msan_test_shadow(x, size) ((intptr_t)-1) +#define __msan_set_exit_code(exit_code) +#define __msan_set_expect_umr(expect_umr) +#define __msan_print_shadow(x, size) +#define __msan_print_param_shadow() +#define __msan_has_dynamic_component() (0) +#define __msan_allocated_memory(data, size) + +#endif // __has_feature(memory_sanitizer) + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/python/clang/5.1/include/shaintrin.h b/python/clang/5.1/include/shaintrin.h new file mode 100644 index 00000000..66ed0554 --- /dev/null +++ b/python/clang/5.1/include/shaintrin.h @@ -0,0 +1,74 @@ +/*===---- shaintrin.h - SHA intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __SHAINTRIN_H +#define __SHAINTRIN_H + +#if !defined (__SHA__) +# error "SHA instructions not enabled" +#endif + +#define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \ + __builtin_ia32_sha1rnds4((V1), (V2), (M)); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha1nexte_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha1nexte(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha1msg1_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha1msg1(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha1msg2_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha1msg2(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z) +{ + return __builtin_ia32_sha256rnds2(__X, __Y, __Z); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha256msg1_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha256msg1(__X, __Y); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha256msg2_epu32(__m128i __X, __m128i __Y) +{ + return __builtin_ia32_sha256msg2(__X, __Y); +} + +#endif /* __SHAINTRIN_H */ diff --git a/python/clang/5.1/include/smmintrin.h b/python/clang/5.1/include/smmintrin.h new file mode 100644 index 00000000..53b3ccb4 --- /dev/null +++ b/python/clang/5.1/include/smmintrin.h @@ -0,0 +1,468 @@ +/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _SMMINTRIN_H +#define _SMMINTRIN_H + +#ifndef __SSE4_1__ +#error "SSE4.1 instruction set not enabled" +#else + +#include + +/* SSE4 Rounding macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 + +#define _MM_FROUND_RAISE_EXC 0x00 +#define _MM_FROUND_NO_EXC 0x08 + +#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) +#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) +#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) +#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) +#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) +#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) + +#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) +#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) +#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) +#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) + +#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) +#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) +#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) +#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) + +#define _mm_round_ps(X, M) __extension__ ({ \ + __m128 __X = (X); \ + (__m128) __builtin_ia32_roundps((__v4sf)__X, (M)); }) + +#define _mm_round_ss(X, Y, M) __extension__ ({ \ + __m128 __X = (X); \ + __m128 __Y = (Y); \ + (__m128) __builtin_ia32_roundss((__v4sf)__X, (__v4sf)__Y, (M)); }) + +#define _mm_round_pd(X, M) __extension__ ({ \ + __m128d __X = (X); \ + (__m128d) __builtin_ia32_roundpd((__v2df)__X, (M)); }) + +#define _mm_round_sd(X, Y, M) __extension__ ({ \ + __m128d __X = (X); \ + __m128d __Y = (Y); \ + (__m128d) __builtin_ia32_roundsd((__v2df)__X, (__v2df)__Y, (M)); }) + +/* SSE4 Packed Blending Intrinsics. */ +#define _mm_blend_pd(V1, V2, M) __extension__ ({ \ + __m128d __V1 = (V1); \ + __m128d __V2 = (V2); \ + (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, (M)); }) + +#define _mm_blend_ps(V1, V2, M) __extension__ ({ \ + __m128 __V1 = (V1); \ + __m128 __V2 = (V2); \ + (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, (M)); }) + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) +{ + return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, + (__v2df)__M); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) +{ + return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, + (__v4sf)__M); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) +{ + return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, + (__v16qi)__M); +} + +#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ + __m128i __V1 = (V1); \ + __m128i __V2 = (V2); \ + (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, (M)); }) + +/* SSE4 Dword Multiply Instructions. */ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_mullo_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) ((__v4si)__V1 * (__v4si)__V2); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_mul_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2); +} + +/* SSE4 Floating Point Dot Product Instructions. */ +#define _mm_dp_ps(X, Y, M) __extension__ ({ \ + __m128 __X = (X); \ + __m128 __Y = (Y); \ + (__m128) __builtin_ia32_dpps((__v4sf)__X, (__v4sf)__Y, (M)); }) + +#define _mm_dp_pd(X, Y, M) __extension__ ({\ + __m128d __X = (X); \ + __m128d __Y = (Y); \ + (__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); }) + +/* SSE4 Streaming Load Hint Instruction. */ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_stream_load_si128 (__m128i *__V) +{ + return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V); +} + +/* SSE4 Packed Integer Min/Max Instructions. */ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_min_epi8 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_max_epi8 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_min_epu16 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_max_epu16 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_min_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_max_epi32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_min_epu32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_max_epu32 (__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); +} + +/* SSE4 Insertion and Extraction from XMM Register Instructions. */ +#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) +#define _mm_extract_ps(X, N) (__extension__ \ + ({ union { int __i; float __f; } __t; \ + __v4sf __a = (__v4sf)(X); \ + __t.__f = __a[(N) & 3]; \ + __t.__i;})) + +/* Miscellaneous insert and extract macros. */ +/* Extract a single-precision float from X at index N into D. */ +#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \ + (D) = __a[N]; })) + +/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create + an index suitable for _mm_insert_ps. */ +#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) + +/* Extract a float from X at index N into the first index of the return. */ +#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \ + _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) + +/* Insert int into packed integer array at index. */ +#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ + __a[(N) & 15] = (I); \ + __a;})) +#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ + __a[(N) & 3] = (I); \ + __a;})) +#ifdef __x86_64__ +#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ + __a[(N) & 1] = (I); \ + __a;})) +#endif /* __x86_64__ */ + +/* Extract int from packed integer array at index. This returns the element + * as a zero extended value, so it is unsigned. + */ +#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \ + (int)(unsigned char) \ + __a[(N) & 15];})) +#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \ + __a[(N) & 3];})) +#ifdef __x86_64__ +#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \ + __a[(N) & 1];})) +#endif /* __x86_64 */ + +/* SSE4 128-bit Packed Integer Comparisons. */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_testz_si128(__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_testc_si128(__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_testnzc_si128(__m128i __M, __m128i __V) +{ + return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); +} + +#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) +#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) +#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) + +/* SSE4 64-bit Packed Integer Comparisons. */ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) +{ + return (__m128i)((__v2di)__V1 == (__v2di)__V2); +} + +/* SSE4 Packed Integer Sign-Extension. */ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepi8_epi16(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepi8_epi32(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepi8_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepi16_epi32(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepi16_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepi32_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V); +} + +/* SSE4 Packed Integer Zero-Extension. */ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepu8_epi16(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepu8_epi32(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepu8_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepu16_epi32(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepu16_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cvtepu32_epi64(__m128i __V) +{ + return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V); +} + +/* SSE4 Pack with Unsigned Saturation. */ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_packus_epi32(__m128i __V1, __m128i __V2) +{ + return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); +} + +/* SSE4 Multiple Packed Sums of Absolute Difference. */ +#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \ + __m128i __X = (X); \ + __m128i __Y = (Y); \ + (__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_minpos_epu16(__m128i __V) +{ + return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V); +} + +/* These definitions are normally in nmmintrin.h, but gcc puts them in here + so we'll do the same. */ +#ifdef __SSE4_2__ + +/* These specify the type of data that we're comparing. */ +#define _SIDD_UBYTE_OPS 0x00 +#define _SIDD_UWORD_OPS 0x01 +#define _SIDD_SBYTE_OPS 0x02 +#define _SIDD_SWORD_OPS 0x03 + +/* These specify the type of comparison operation. */ +#define _SIDD_CMP_EQUAL_ANY 0x00 +#define _SIDD_CMP_RANGES 0x04 +#define _SIDD_CMP_EQUAL_EACH 0x08 +#define _SIDD_CMP_EQUAL_ORDERED 0x0c + +/* These macros specify the polarity of the operation. */ +#define _SIDD_POSITIVE_POLARITY 0x00 +#define _SIDD_NEGATIVE_POLARITY 0x10 +#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 +#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 + +/* These macros are used in _mm_cmpXstri() to specify the return. */ +#define _SIDD_LEAST_SIGNIFICANT 0x00 +#define _SIDD_MOST_SIGNIFICANT 0x40 + +/* These macros are used in _mm_cmpXstri() to specify the return. */ +#define _SIDD_BIT_MASK 0x00 +#define _SIDD_UNIT_MASK 0x40 + +/* SSE4.2 Packed Comparison Intrinsics. */ +#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) +#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) + +#define _mm_cmpestrm(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestri(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) + +/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ +#define _mm_cmpistra(A, B, M) \ + __builtin_ia32_pcmpistria128((A), (B), (M)) +#define _mm_cmpistrc(A, B, M) \ + __builtin_ia32_pcmpistric128((A), (B), (M)) +#define _mm_cmpistro(A, B, M) \ + __builtin_ia32_pcmpistrio128((A), (B), (M)) +#define _mm_cmpistrs(A, B, M) \ + __builtin_ia32_pcmpistris128((A), (B), (M)) +#define _mm_cmpistrz(A, B, M) \ + __builtin_ia32_pcmpistriz128((A), (B), (M)) + +#define _mm_cmpestra(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestrc(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestro(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestrs(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) +#define _mm_cmpestrz(A, LA, B, LB, M) \ + __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M)) + +/* SSE4.2 Compare Packed Data -- Greater Than. */ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) +{ + return (__m128i)((__v2di)__V1 > (__v2di)__V2); +} + +/* SSE4.2 Accumulate CRC32. */ +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_mm_crc32_u8(unsigned int __C, unsigned char __D) +{ + return __builtin_ia32_crc32qi(__C, __D); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_mm_crc32_u16(unsigned int __C, unsigned short __D) +{ + return __builtin_ia32_crc32hi(__C, __D); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_mm_crc32_u32(unsigned int __C, unsigned int __D) +{ + return __builtin_ia32_crc32si(__C, __D); +} + +#ifdef __x86_64__ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +_mm_crc32_u64(unsigned long long __C, unsigned long long __D) +{ + return __builtin_ia32_crc32di(__C, __D); +} +#endif /* __x86_64__ */ + +#ifdef __POPCNT__ +#include +#endif + +#endif /* __SSE4_2__ */ +#endif /* __SSE4_1__ */ + +#endif /* _SMMINTRIN_H */ diff --git a/python/clang/5.1/include/stdalign.h b/python/clang/5.1/include/stdalign.h new file mode 100644 index 00000000..3738d128 --- /dev/null +++ b/python/clang/5.1/include/stdalign.h @@ -0,0 +1,35 @@ +/*===---- stdalign.h - Standard header for alignment ------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDALIGN_H +#define __STDALIGN_H + +#ifndef __cplusplus +#define alignas _Alignas +#define alignof _Alignof +#endif + +#define __alignas_is_defined 1 +#define __alignof_is_defined 1 + +#endif /* __STDALIGN_H */ diff --git a/python/clang/5.1/include/stdarg.h b/python/clang/5.1/include/stdarg.h new file mode 100644 index 00000000..2957bf05 --- /dev/null +++ b/python/clang/5.1/include/stdarg.h @@ -0,0 +1,50 @@ +/*===---- stdarg.h - Variable argument handling ----------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDARG_H +#define __STDARG_H + +#ifndef _VA_LIST +typedef __builtin_va_list va_list; +#define _VA_LIST +#endif +#define va_start(ap, param) __builtin_va_start(ap, param) +#define va_end(ap) __builtin_va_end(ap) +#define va_arg(ap, type) __builtin_va_arg(ap, type) + +/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode + * or -ansi is not specified, since it was not part of C90. + */ +#define __va_copy(d,s) __builtin_va_copy(d,s) + +#if __STDC_VERSION__ >= 199900L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__) +#define va_copy(dest, src) __builtin_va_copy(dest, src) +#endif + +/* Hack required to make standard headers work, at least on Ubuntu */ +#define __GNUC_VA_LIST 1 +typedef __builtin_va_list __gnuc_va_list; + +#endif /* __STDARG_H */ diff --git a/python/clang/5.1/include/stdbool.h b/python/clang/5.1/include/stdbool.h new file mode 100644 index 00000000..0467893f --- /dev/null +++ b/python/clang/5.1/include/stdbool.h @@ -0,0 +1,44 @@ +/*===---- stdbool.h - Standard header for booleans -------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDBOOL_H +#define __STDBOOL_H + +/* Don't define bool, true, and false in C++, except as a GNU extension. */ +#ifndef __cplusplus +#define bool _Bool +#define true 1 +#define false 0 +#elif defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Define _Bool, bool, false, true as a GNU extension. */ +#define _Bool bool +#define bool bool +#define false false +#define true true +#endif + +#define __bool_true_false_are_defined 1 + +#endif /* __STDBOOL_H */ diff --git a/python/clang/5.1/include/stddef.h b/python/clang/5.1/include/stddef.h new file mode 100644 index 00000000..6a64d6d3 --- /dev/null +++ b/python/clang/5.1/include/stddef.h @@ -0,0 +1,102 @@ +/*===---- stddef.h - Basic type definitions --------------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDDEF_H +#define __STDDEF_H + +#if !defined(_PTRDIFF_T) || __has_feature(modules) +/* Always define ptrdiff_t when modules are available. */ +#if !__has_feature(modules) +#define _PTRDIFF_T +#endif +typedef __PTRDIFF_TYPE__ ptrdiff_t; +#endif + +#if !defined(_SIZE_T) || __has_feature(modules) +/* Always define size_t when modules are available. */ +#if !__has_feature(modules) +#define _SIZE_T +#endif +typedef __SIZE_TYPE__ size_t; +#endif + +/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is + * enabled. */ +#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ + !defined(_RSIZE_T)) || __has_feature(modules) +/* Always define rsize_t when modules are available. */ +#if !__has_feature(modules) +#define _RSIZE_T +#endif +typedef __SIZE_TYPE__ rsize_t; +#endif + +#ifndef __cplusplus +/* Always define wchar_t when modules are available. */ +#if !defined(_WCHAR_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WCHAR_T +#if defined(_MSC_EXTENSIONS) +#define _WCHAR_T_DEFINED +#endif +#endif +typedef __WCHAR_TYPE__ wchar_t; +#endif +#endif + +#undef NULL +#ifdef __cplusplus +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# define NULL 0 +# endif +#else +# define NULL ((void*)0) +#endif + +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif +#endif + +#define offsetof(t, d) __builtin_offsetof(t, d) + +#endif /* __STDDEF_H */ + +/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use +__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ +#if defined(__need_wint_t) +/* Always define wint_t when modules are available. */ +#if !defined(_WINT_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WINT_T +#endif +typedef __WINT_TYPE__ wint_t; +#endif +#undef __need_wint_t +#endif /* __need_wint_t */ diff --git a/python/clang/5.1/include/stdint.h b/python/clang/5.1/include/stdint.h new file mode 100644 index 00000000..11529c0c --- /dev/null +++ b/python/clang/5.1/include/stdint.h @@ -0,0 +1,708 @@ +/*===---- stdint.h - Standard header for sized integer types --------------===*\ + * + * Copyright (c) 2009 Chris Lattner + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __CLANG_STDINT_H +#define __CLANG_STDINT_H + +/* If we're hosted, fall back to the system's stdint.h, which might have + * additional definitions. + */ +#if __STDC_HOSTED__ && \ + defined(__has_include_next) && __has_include_next() + +// C99 7.18.3 Limits of other integer types +// +// Footnote 219, 220: C++ implementations should define these macros only when +// __STDC_LIMIT_MACROS is defined before is included. +// +// Footnote 222: C++ implementations should define these macros only when +// __STDC_CONSTANT_MACROS is defined before is included. +// +// C++11 [cstdint.syn]p2: +// +// The macros defined by are provided unconditionally. In particular, +// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in +// footnotes 219, 220, and 222 in the C standard) play no role in C++. +// +// C11 removed the problematic footnotes. +// +// Work around this inconsistency by always defining those macros in C++ mode, +// so that a C library implementation which follows the C99 standard can be +// used in C++. +# ifdef __cplusplus +# if !defined(__STDC_LIMIT_MACROS) +# define __STDC_LIMIT_MACROS +# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG +# endif +# if !defined(__STDC_CONSTANT_MACROS) +# define __STDC_CONSTANT_MACROS +# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG +# endif +# endif + +# include_next + +# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG +# undef __STDC_LIMIT_MACROS +# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG +# endif +# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG +# undef __STDC_CONSTANT_MACROS +# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG +# endif + +#else + +/* C99 7.18.1.1 Exact-width integer types. + * C99 7.18.1.2 Minimum-width integer types. + * C99 7.18.1.3 Fastest minimum-width integer types. + * + * The standard requires that exact-width type be defined for 8-, 16-, 32-, and + * 64-bit types if they are implemented. Other exact width types are optional. + * This implementation defines an exact-width types for every integer width + * that is represented in the standard integer types. + * + * The standard also requires minimum-width types be defined for 8-, 16-, 32-, + * and 64-bit widths regardless of whether there are corresponding exact-width + * types. + * + * To accommodate targets that are missing types that are exactly 8, 16, 32, or + * 64 bits wide, this implementation takes an approach of cascading + * redefintions, redefining __int_leastN_t to successively smaller exact-width + * types. It is therefore important that the types are defined in order of + * descending widths. + * + * We currently assume that the minimum-width types and the fastest + * minimum-width types are the same. This is allowed by the standard, but is + * suboptimal. + * + * In violation of the standard, some targets do not implement a type that is + * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit). + * To accommodate these targets, a required minimum-width type is only + * defined if there exists an exact-width type of equal or greater width. + */ + +#ifdef __INT64_TYPE__ +# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/ +typedef signed __INT64_TYPE__ int64_t; +# endif /* __int8_t_defined */ +typedef unsigned __INT64_TYPE__ uint64_t; +# define __int_least64_t int64_t +# define __uint_least64_t uint64_t +# define __int_least32_t int64_t +# define __uint_least32_t uint64_t +# define __int_least16_t int64_t +# define __uint_least16_t uint64_t +# define __int_least8_t int64_t +# define __uint_least8_t uint64_t +#endif /* __INT64_TYPE__ */ + +#ifdef __int_least64_t +typedef __int_least64_t int_least64_t; +typedef __uint_least64_t uint_least64_t; +typedef __int_least64_t int_fast64_t; +typedef __uint_least64_t uint_fast64_t; +#endif /* __int_least64_t */ + +#ifdef __INT56_TYPE__ +typedef signed __INT56_TYPE__ int56_t; +typedef unsigned __INT56_TYPE__ uint56_t; +typedef int56_t int_least56_t; +typedef uint56_t uint_least56_t; +typedef int56_t int_fast56_t; +typedef uint56_t uint_fast56_t; +# define __int_least32_t int56_t +# define __uint_least32_t uint56_t +# define __int_least16_t int56_t +# define __uint_least16_t uint56_t +# define __int_least8_t int56_t +# define __uint_least8_t uint56_t +#endif /* __INT56_TYPE__ */ + + +#ifdef __INT48_TYPE__ +typedef signed __INT48_TYPE__ int48_t; +typedef unsigned __INT48_TYPE__ uint48_t; +typedef int48_t int_least48_t; +typedef uint48_t uint_least48_t; +typedef int48_t int_fast48_t; +typedef uint48_t uint_fast48_t; +# define __int_least32_t int48_t +# define __uint_least32_t uint48_t +# define __int_least16_t int48_t +# define __uint_least16_t uint48_t +# define __int_least8_t int48_t +# define __uint_least8_t uint48_t +#endif /* __INT48_TYPE__ */ + + +#ifdef __INT40_TYPE__ +typedef signed __INT40_TYPE__ int40_t; +typedef unsigned __INT40_TYPE__ uint40_t; +typedef int40_t int_least40_t; +typedef uint40_t uint_least40_t; +typedef int40_t int_fast40_t; +typedef uint40_t uint_fast40_t; +# define __int_least32_t int40_t +# define __uint_least32_t uint40_t +# define __int_least16_t int40_t +# define __uint_least16_t uint40_t +# define __int_least8_t int40_t +# define __uint_least8_t uint40_t +#endif /* __INT40_TYPE__ */ + + +#ifdef __INT32_TYPE__ + +# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/ +typedef signed __INT32_TYPE__ int32_t; +# endif /* __int8_t_defined */ + +# ifndef __uint32_t_defined /* more glibc compatibility */ +# define __uint32_t_defined +typedef unsigned __INT32_TYPE__ uint32_t; +# endif /* __uint32_t_defined */ + +# define __int_least32_t int32_t +# define __uint_least32_t uint32_t +# define __int_least16_t int32_t +# define __uint_least16_t uint32_t +# define __int_least8_t int32_t +# define __uint_least8_t uint32_t +#endif /* __INT32_TYPE__ */ + +#ifdef __int_least32_t +typedef __int_least32_t int_least32_t; +typedef __uint_least32_t uint_least32_t; +typedef __int_least32_t int_fast32_t; +typedef __uint_least32_t uint_fast32_t; +#endif /* __int_least32_t */ + +#ifdef __INT24_TYPE__ +typedef signed __INT24_TYPE__ int24_t; +typedef unsigned __INT24_TYPE__ uint24_t; +typedef int24_t int_least24_t; +typedef uint24_t uint_least24_t; +typedef int24_t int_fast24_t; +typedef uint24_t uint_fast24_t; +# define __int_least16_t int24_t +# define __uint_least16_t uint24_t +# define __int_least8_t int24_t +# define __uint_least8_t uint24_t +#endif /* __INT24_TYPE__ */ + +#ifdef __INT16_TYPE__ +#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/ +typedef signed __INT16_TYPE__ int16_t; +#endif /* __int8_t_defined */ +typedef unsigned __INT16_TYPE__ uint16_t; +# define __int_least16_t int16_t +# define __uint_least16_t uint16_t +# define __int_least8_t int16_t +# define __uint_least8_t uint16_t +#endif /* __INT16_TYPE__ */ + +#ifdef __int_least16_t +typedef __int_least16_t int_least16_t; +typedef __uint_least16_t uint_least16_t; +typedef __int_least16_t int_fast16_t; +typedef __uint_least16_t uint_fast16_t; +#endif /* __int_least16_t */ + + +#ifdef __INT8_TYPE__ +#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/ +typedef signed __INT8_TYPE__ int8_t; +#endif /* __int8_t_defined */ +typedef unsigned __INT8_TYPE__ uint8_t; +# define __int_least8_t int8_t +# define __uint_least8_t uint8_t +#endif /* __INT8_TYPE__ */ + +#ifdef __int_least8_t +typedef __int_least8_t int_least8_t; +typedef __uint_least8_t uint_least8_t; +typedef __int_least8_t int_fast8_t; +typedef __uint_least8_t uint_fast8_t; +#endif /* __int_least8_t */ + +/* prevent glibc sys/types.h from defining conflicting types */ +#ifndef __int8_t_defined +# define __int8_t_defined +#endif /* __int8_t_defined */ + +/* C99 7.18.1.4 Integer types capable of holding object pointers. + */ +#define __stdint_join3(a,b,c) a ## b ## c + +#define __intn_t(n) __stdint_join3( int, n, _t) +#define __uintn_t(n) __stdint_join3(uint, n, _t) + +#ifndef _INTPTR_T +#ifndef __intptr_t_defined +typedef __intn_t(__INTPTR_WIDTH__) intptr_t; +#define __intptr_t_defined +#define _INTPTR_T +#endif +#endif + +#ifndef _UINTPTR_T +typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t; +#define _UINTPTR_T +#endif + +/* C99 7.18.1.5 Greatest-width integer types. + */ +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; + +/* C99 7.18.4 Macros for minimum-width integer constants. + * + * The standard requires that integer constant macros be defined for all the + * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width + * types are required, the corresponding integer constant macros are defined + * here. This implementation also defines minimum-width types for every other + * integer width that the target implements, so corresponding macros are + * defined below, too. + * + * These macros are defined using the same successive-shrinking approach as + * the type definitions above. It is likewise important that macros are defined + * in order of decending width. + * + * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the + * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). + */ + +#define __int_c_join(a, b) a ## b +#define __int_c(v, suffix) __int_c_join(v, suffix) +#define __uint_c(v, suffix) __int_c_join(v##U, suffix) + + +#ifdef __INT64_TYPE__ +# ifdef __INT64_C_SUFFIX__ +# define __int64_c_suffix __INT64_C_SUFFIX__ +# define __int32_c_suffix __INT64_C_SUFFIX__ +# define __int16_c_suffix __INT64_C_SUFFIX__ +# define __int8_c_suffix __INT64_C_SUFFIX__ +# else +# undef __int64_c_suffix +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT64_C_SUFFIX__ */ +#endif /* __INT64_TYPE__ */ + +#ifdef __int_least64_t +# ifdef __int64_c_suffix +# define INT64_C(v) __int_c(v, __int64_c_suffix) +# define UINT64_C(v) __uint_c(v, __int64_c_suffix) +# else +# define INT64_C(v) v +# define UINT64_C(v) v ## U +# endif /* __int64_c_suffix */ +#endif /* __int_least64_t */ + + +#ifdef __INT56_TYPE__ +# ifdef __INT56_C_SUFFIX__ +# define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__) +# define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__) +# define __int32_c_suffix __INT56_C_SUFFIX__ +# define __int16_c_suffix __INT56_C_SUFFIX__ +# define __int8_c_suffix __INT56_C_SUFFIX__ +# else +# define INT56_C(v) v +# define UINT56_C(v) v ## U +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT56_C_SUFFIX__ */ +#endif /* __INT56_TYPE__ */ + + +#ifdef __INT48_TYPE__ +# ifdef __INT48_C_SUFFIX__ +# define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__) +# define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__) +# define __int32_c_suffix __INT48_C_SUFFIX__ +# define __int16_c_suffix __INT48_C_SUFFIX__ +# define __int8_c_suffix __INT48_C_SUFFIX__ +# else +# define INT48_C(v) v +# define UINT48_C(v) v ## U +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT48_C_SUFFIX__ */ +#endif /* __INT48_TYPE__ */ + + +#ifdef __INT40_TYPE__ +# ifdef __INT40_C_SUFFIX__ +# define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__) +# define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__) +# define __int32_c_suffix __INT40_C_SUFFIX__ +# define __int16_c_suffix __INT40_C_SUFFIX__ +# define __int8_c_suffix __INT40_C_SUFFIX__ +# else +# define INT40_C(v) v +# define UINT40_C(v) v ## U +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT40_C_SUFFIX__ */ +#endif /* __INT40_TYPE__ */ + + +#ifdef __INT32_TYPE__ +# ifdef __INT32_C_SUFFIX__ +# define __int32_c_suffix __INT32_C_SUFFIX__ +# define __int16_c_suffix __INT32_C_SUFFIX__ +# define __int8_c_suffix __INT32_C_SUFFIX__ +#else +# undef __int32_c_suffix +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT32_C_SUFFIX__ */ +#endif /* __INT32_TYPE__ */ + +#ifdef __int_least32_t +# ifdef __int32_c_suffix +# define INT32_C(v) __int_c(v, __int32_c_suffix) +# define UINT32_C(v) __uint_c(v, __int32_c_suffix) +# else +# define INT32_C(v) v +# define UINT32_C(v) v ## U +# endif /* __int32_c_suffix */ +#endif /* __int_least32_t */ + + +#ifdef __INT24_TYPE__ +# ifdef __INT24_C_SUFFIX__ +# define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__) +# define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__) +# define __int16_c_suffix __INT24_C_SUFFIX__ +# define __int8_c_suffix __INT24_C_SUFFIX__ +# else +# define INT24_C(v) v +# define UINT24_C(v) v ## U +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT24_C_SUFFIX__ */ +#endif /* __INT24_TYPE__ */ + + +#ifdef __INT16_TYPE__ +# ifdef __INT16_C_SUFFIX__ +# define __int16_c_suffix __INT16_C_SUFFIX__ +# define __int8_c_suffix __INT16_C_SUFFIX__ +#else +# undef __int16_c_suffix +# undef __int8_c_suffix +# endif /* __INT16_C_SUFFIX__ */ +#endif /* __INT16_TYPE__ */ + +#ifdef __int_least16_t +# ifdef __int16_c_suffix +# define INT16_C(v) __int_c(v, __int16_c_suffix) +# define UINT16_C(v) __uint_c(v, __int16_c_suffix) +# else +# define INT16_C(v) v +# define UINT16_C(v) v ## U +# endif /* __int16_c_suffix */ +#endif /* __int_least16_t */ + + +#ifdef __INT8_TYPE__ +# ifdef __INT8_C_SUFFIX__ +# define __int8_c_suffix __INT8_C_SUFFIX__ +#else +# undef __int8_c_suffix +# endif /* __INT8_C_SUFFIX__ */ +#endif /* __INT8_TYPE__ */ + +#ifdef __int_least8_t +# ifdef __int8_c_suffix +# define INT8_C(v) __int_c(v, __int8_c_suffix) +# define UINT8_C(v) __uint_c(v, __int8_c_suffix) +# else +# define INT8_C(v) v +# define UINT8_C(v) v ## U +# endif /* __int8_c_suffix */ +#endif /* __int_least8_t */ + + +/* C99 7.18.2.1 Limits of exact-width integer types. + * C99 7.18.2.2 Limits of minimum-width integer types. + * C99 7.18.2.3 Limits of fastest minimum-width integer types. + * + * The presence of limit macros are completely optional in C99. This + * implementation defines limits for all of the types (exact- and + * minimum-width) that it defines above, using the limits of the minimum-width + * type for any types that do not have exact-width representations. + * + * As in the type definitions, this section takes an approach of + * successive-shrinking to determine which limits to use for the standard (8, + * 16, 32, 64) bit widths when they don't have exact representations. It is + * therefore important that the defintions be kept in order of decending + * widths. + * + * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the + * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). + */ + +#ifdef __INT64_TYPE__ +# define INT64_MAX INT64_C( 9223372036854775807) +# define INT64_MIN (-INT64_C( 9223372036854775807)-1) +# define UINT64_MAX UINT64_C(18446744073709551615) +# define __INT_LEAST64_MIN INT64_MIN +# define __INT_LEAST64_MAX INT64_MAX +# define __UINT_LEAST64_MAX UINT64_MAX +# define __INT_LEAST32_MIN INT64_MIN +# define __INT_LEAST32_MAX INT64_MAX +# define __UINT_LEAST32_MAX UINT64_MAX +# define __INT_LEAST16_MIN INT64_MIN +# define __INT_LEAST16_MAX INT64_MAX +# define __UINT_LEAST16_MAX UINT64_MAX +# define __INT_LEAST8_MIN INT64_MIN +# define __INT_LEAST8_MAX INT64_MAX +# define __UINT_LEAST8_MAX UINT64_MAX +#endif /* __INT64_TYPE__ */ + +#ifdef __INT_LEAST64_MIN +# define INT_LEAST64_MIN __INT_LEAST64_MIN +# define INT_LEAST64_MAX __INT_LEAST64_MAX +# define UINT_LEAST64_MAX __UINT_LEAST64_MAX +# define INT_FAST64_MIN __INT_LEAST64_MIN +# define INT_FAST64_MAX __INT_LEAST64_MAX +# define UINT_FAST64_MAX __UINT_LEAST64_MAX +#endif /* __INT_LEAST64_MIN */ + + +#ifdef __INT56_TYPE__ +# define INT56_MAX INT56_C(36028797018963967) +# define INT56_MIN (-INT56_C(36028797018963967)-1) +# define UINT56_MAX UINT56_C(72057594037927935) +# define INT_LEAST56_MIN INT56_MIN +# define INT_LEAST56_MAX INT56_MAX +# define UINT_LEAST56_MAX UINT56_MAX +# define INT_FAST56_MIN INT56_MIN +# define INT_FAST56_MAX INT56_MAX +# define UINT_FAST56_MAX UINT56_MAX +# define __INT_LEAST32_MIN INT56_MIN +# define __INT_LEAST32_MAX INT56_MAX +# define __UINT_LEAST32_MAX UINT56_MAX +# define __INT_LEAST16_MIN INT56_MIN +# define __INT_LEAST16_MAX INT56_MAX +# define __UINT_LEAST16_MAX UINT56_MAX +# define __INT_LEAST8_MIN INT56_MIN +# define __INT_LEAST8_MAX INT56_MAX +# define __UINT_LEAST8_MAX UINT56_MAX +#endif /* __INT56_TYPE__ */ + + +#ifdef __INT48_TYPE__ +# define INT48_MAX INT48_C(140737488355327) +# define INT48_MIN (-INT48_C(140737488355327)-1) +# define UINT48_MAX UINT48_C(281474976710655) +# define INT_LEAST48_MIN INT48_MIN +# define INT_LEAST48_MAX INT48_MAX +# define UINT_LEAST48_MAX UINT48_MAX +# define INT_FAST48_MIN INT48_MIN +# define INT_FAST48_MAX INT48_MAX +# define UINT_FAST48_MAX UINT48_MAX +# define __INT_LEAST32_MIN INT48_MIN +# define __INT_LEAST32_MAX INT48_MAX +# define __UINT_LEAST32_MAX UINT48_MAX +# define __INT_LEAST16_MIN INT48_MIN +# define __INT_LEAST16_MAX INT48_MAX +# define __UINT_LEAST16_MAX UINT48_MAX +# define __INT_LEAST8_MIN INT48_MIN +# define __INT_LEAST8_MAX INT48_MAX +# define __UINT_LEAST8_MAX UINT48_MAX +#endif /* __INT48_TYPE__ */ + + +#ifdef __INT40_TYPE__ +# define INT40_MAX INT40_C(549755813887) +# define INT40_MIN (-INT40_C(549755813887)-1) +# define UINT40_MAX UINT40_C(1099511627775) +# define INT_LEAST40_MIN INT40_MIN +# define INT_LEAST40_MAX INT40_MAX +# define UINT_LEAST40_MAX UINT40_MAX +# define INT_FAST40_MIN INT40_MIN +# define INT_FAST40_MAX INT40_MAX +# define UINT_FAST40_MAX UINT40_MAX +# define __INT_LEAST32_MIN INT40_MIN +# define __INT_LEAST32_MAX INT40_MAX +# define __UINT_LEAST32_MAX UINT40_MAX +# define __INT_LEAST16_MIN INT40_MIN +# define __INT_LEAST16_MAX INT40_MAX +# define __UINT_LEAST16_MAX UINT40_MAX +# define __INT_LEAST8_MIN INT40_MIN +# define __INT_LEAST8_MAX INT40_MAX +# define __UINT_LEAST8_MAX UINT40_MAX +#endif /* __INT40_TYPE__ */ + + +#ifdef __INT32_TYPE__ +# define INT32_MAX INT32_C(2147483647) +# define INT32_MIN (-INT32_C(2147483647)-1) +# define UINT32_MAX UINT32_C(4294967295) +# define __INT_LEAST32_MIN INT32_MIN +# define __INT_LEAST32_MAX INT32_MAX +# define __UINT_LEAST32_MAX UINT32_MAX +# define __INT_LEAST16_MIN INT32_MIN +# define __INT_LEAST16_MAX INT32_MAX +# define __UINT_LEAST16_MAX UINT32_MAX +# define __INT_LEAST8_MIN INT32_MIN +# define __INT_LEAST8_MAX INT32_MAX +# define __UINT_LEAST8_MAX UINT32_MAX +#endif /* __INT32_TYPE__ */ + +#ifdef __INT_LEAST32_MIN +# define INT_LEAST32_MIN __INT_LEAST32_MIN +# define INT_LEAST32_MAX __INT_LEAST32_MAX +# define UINT_LEAST32_MAX __UINT_LEAST32_MAX +# define INT_FAST32_MIN __INT_LEAST32_MIN +# define INT_FAST32_MAX __INT_LEAST32_MAX +# define UINT_FAST32_MAX __UINT_LEAST32_MAX +#endif /* __INT_LEAST32_MIN */ + + +#ifdef __INT24_TYPE__ +# define INT24_MAX INT24_C(8388607) +# define INT24_MIN (-INT24_C(8388607)-1) +# define UINT24_MAX UINT24_C(16777215) +# define INT_LEAST24_MIN INT24_MIN +# define INT_LEAST24_MAX INT24_MAX +# define UINT_LEAST24_MAX UINT24_MAX +# define INT_FAST24_MIN INT24_MIN +# define INT_FAST24_MAX INT24_MAX +# define UINT_FAST24_MAX UINT24_MAX +# define __INT_LEAST16_MIN INT24_MIN +# define __INT_LEAST16_MAX INT24_MAX +# define __UINT_LEAST16_MAX UINT24_MAX +# define __INT_LEAST8_MIN INT24_MIN +# define __INT_LEAST8_MAX INT24_MAX +# define __UINT_LEAST8_MAX UINT24_MAX +#endif /* __INT24_TYPE__ */ + + +#ifdef __INT16_TYPE__ +#define INT16_MAX INT16_C(32767) +#define INT16_MIN (-INT16_C(32767)-1) +#define UINT16_MAX UINT16_C(65535) +# define __INT_LEAST16_MIN INT16_MIN +# define __INT_LEAST16_MAX INT16_MAX +# define __UINT_LEAST16_MAX UINT16_MAX +# define __INT_LEAST8_MIN INT16_MIN +# define __INT_LEAST8_MAX INT16_MAX +# define __UINT_LEAST8_MAX UINT16_MAX +#endif /* __INT16_TYPE__ */ + +#ifdef __INT_LEAST16_MIN +# define INT_LEAST16_MIN __INT_LEAST16_MIN +# define INT_LEAST16_MAX __INT_LEAST16_MAX +# define UINT_LEAST16_MAX __UINT_LEAST16_MAX +# define INT_FAST16_MIN __INT_LEAST16_MIN +# define INT_FAST16_MAX __INT_LEAST16_MAX +# define UINT_FAST16_MAX __UINT_LEAST16_MAX +#endif /* __INT_LEAST16_MIN */ + + +#ifdef __INT8_TYPE__ +# define INT8_MAX INT8_C(127) +# define INT8_MIN (-INT8_C(127)-1) +# define UINT8_MAX UINT8_C(255) +# define __INT_LEAST8_MIN INT8_MIN +# define __INT_LEAST8_MAX INT8_MAX +# define __UINT_LEAST8_MAX UINT8_MAX +#endif /* __INT8_TYPE__ */ + +#ifdef __INT_LEAST8_MIN +# define INT_LEAST8_MIN __INT_LEAST8_MIN +# define INT_LEAST8_MAX __INT_LEAST8_MAX +# define UINT_LEAST8_MAX __UINT_LEAST8_MAX +# define INT_FAST8_MIN __INT_LEAST8_MIN +# define INT_FAST8_MAX __INT_LEAST8_MAX +# define UINT_FAST8_MAX __UINT_LEAST8_MAX +#endif /* __INT_LEAST8_MIN */ + +/* Some utility macros */ +#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN) +#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX) +#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX) +#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v)) +#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v)) + +/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */ +/* C99 7.18.3 Limits of other integer types. */ + +#define INTPTR_MIN __INTN_MIN(__INTPTR_WIDTH__) +#define INTPTR_MAX __INTN_MAX(__INTPTR_WIDTH__) +#define UINTPTR_MAX __UINTN_MAX(__INTPTR_WIDTH__) +#define PTRDIFF_MIN __INTN_MIN(__PTRDIFF_WIDTH__) +#define PTRDIFF_MAX __INTN_MAX(__PTRDIFF_WIDTH__) +#define SIZE_MAX __UINTN_MAX(__SIZE_WIDTH__) + +/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__ + * is enabled. */ +#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 +#define RSIZE_MAX (SIZE_MAX >> 1) +#endif + +/* C99 7.18.2.5 Limits of greatest-width integer types. */ +#define INTMAX_MIN __INTN_MIN(__INTMAX_WIDTH__) +#define INTMAX_MAX __INTN_MAX(__INTMAX_WIDTH__) +#define UINTMAX_MAX __UINTN_MAX(__INTMAX_WIDTH__) + +/* C99 7.18.3 Limits of other integer types. */ +#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__) +#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__) +#ifdef __WINT_UNSIGNED__ +# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0) +# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__) +#else +# define WINT_MIN __INTN_MIN(__WINT_WIDTH__) +# define WINT_MAX __INTN_MAX(__WINT_WIDTH__) +#endif + +#ifndef WCHAR_MAX +# define WCHAR_MAX __WCHAR_MAX__ +#endif +#ifndef WCHAR_MIN +# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__) +# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__) +# else +# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0) +# endif +#endif + +/* 7.18.4.2 Macros for greatest-width integer constants. */ +#define INTMAX_C(v) __INTN_C(__INTMAX_WIDTH__, v) +#define UINTMAX_C(v) __UINTN_C(__INTMAX_WIDTH__, v) + +#endif /* __STDC_HOSTED__ */ +#endif /* __CLANG_STDINT_H */ diff --git a/python/clang/5.1/include/stdnoreturn.h b/python/clang/5.1/include/stdnoreturn.h new file mode 100644 index 00000000..a7a301d7 --- /dev/null +++ b/python/clang/5.1/include/stdnoreturn.h @@ -0,0 +1,30 @@ +/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDNORETURN_H +#define __STDNORETURN_H + +#define noreturn _Noreturn +#define __noreturn_is_defined 1 + +#endif /* __STDNORETURN_H */ diff --git a/python/clang/5.1/include/tbmintrin.h b/python/clang/5.1/include/tbmintrin.h new file mode 100644 index 00000000..f95e34fb --- /dev/null +++ b/python/clang/5.1/include/tbmintrin.h @@ -0,0 +1,158 @@ +/*===---- tbmintrin.h - TBM intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __TBM__ +#error "TBM instruction set is not enabled" +#endif + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __TBMINTRIN_H +#define __TBMINTRIN_H + +#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b))) + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcfill_u32(unsigned int a) +{ + return a & (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blci_u32(unsigned int a) +{ + return a | ~(a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcic_u32(unsigned int a) +{ + return ~a & (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcmsk_u32(unsigned int a) +{ + return a ^ (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blcs_u32(unsigned int a) +{ + return a | (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blsfill_u32(unsigned int a) +{ + return a | (a - 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__blsic_u32(unsigned int a) +{ + return ~a | (a - 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__t1mskc_u32(unsigned int a) +{ + return ~a | (a + 1); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__tzmsk_u32(unsigned int a) +{ + return ~a & (a - 1); +} + +#ifdef __x86_64__ +#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b))) + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcfill_u64(unsigned long long a) +{ + return a & (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blci_u64(unsigned long long a) +{ + return a | ~(a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcic_u64(unsigned long long a) +{ + return ~a & (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcmsk_u64(unsigned long long a) +{ + return a ^ (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blcs_u64(unsigned long long a) +{ + return a | (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blsfill_u64(unsigned long long a) +{ + return a | (a - 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__blsic_u64(unsigned long long a) +{ + return ~a | (a - 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__t1mskc_u64(unsigned long long a) +{ + return ~a | (a + 1); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, + __nodebug__)) +__tzmsk_u64(unsigned long long a) +{ + return ~a & (a - 1); +} +#endif + +#endif /* __TBMINTRIN_H */ diff --git a/python/clang/5.1/include/tgmath.h b/python/clang/5.1/include/tgmath.h new file mode 100644 index 00000000..a48e267e --- /dev/null +++ b/python/clang/5.1/include/tgmath.h @@ -0,0 +1,1374 @@ +/*===---- tgmath.h - Standard header for type generic math ----------------===*\ + * + * Copyright (c) 2009 Howard Hinnant + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * +\*===----------------------------------------------------------------------===*/ + +#ifndef __TGMATH_H +#define __TGMATH_H + +/* C99 7.22 Type-generic math . */ +#include + +/* C++ handles type genericity with overloading in math.h. */ +#ifndef __cplusplus +#include + +#define _TG_ATTRSp __attribute__((__overloadable__)) +#define _TG_ATTRS __attribute__((__overloadable__, __always_inline__)) + +// promotion + +typedef void _Argument_type_is_not_arithmetic; +static _Argument_type_is_not_arithmetic __tg_promote(...) + __attribute__((__unavailable__,__overloadable__)); +static double _TG_ATTRSp __tg_promote(int); +static double _TG_ATTRSp __tg_promote(unsigned int); +static double _TG_ATTRSp __tg_promote(long); +static double _TG_ATTRSp __tg_promote(unsigned long); +static double _TG_ATTRSp __tg_promote(long long); +static double _TG_ATTRSp __tg_promote(unsigned long long); +static float _TG_ATTRSp __tg_promote(float); +static double _TG_ATTRSp __tg_promote(double); +static long double _TG_ATTRSp __tg_promote(long double); +static float _Complex _TG_ATTRSp __tg_promote(float _Complex); +static double _Complex _TG_ATTRSp __tg_promote(double _Complex); +static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex); + +#define __tg_promote1(__x) (__typeof__(__tg_promote(__x))) +#define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \ + __tg_promote(__y))) +#define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \ + __tg_promote(__y) + \ + __tg_promote(__z))) + +// acos + +static float + _TG_ATTRS + __tg_acos(float __x) {return acosf(__x);} + +static double + _TG_ATTRS + __tg_acos(double __x) {return acos(__x);} + +static long double + _TG_ATTRS + __tg_acos(long double __x) {return acosl(__x);} + +static float _Complex + _TG_ATTRS + __tg_acos(float _Complex __x) {return cacosf(__x);} + +static double _Complex + _TG_ATTRS + __tg_acos(double _Complex __x) {return cacos(__x);} + +static long double _Complex + _TG_ATTRS + __tg_acos(long double _Complex __x) {return cacosl(__x);} + +#undef acos +#define acos(__x) __tg_acos(__tg_promote1((__x))(__x)) + +// asin + +static float + _TG_ATTRS + __tg_asin(float __x) {return asinf(__x);} + +static double + _TG_ATTRS + __tg_asin(double __x) {return asin(__x);} + +static long double + _TG_ATTRS + __tg_asin(long double __x) {return asinl(__x);} + +static float _Complex + _TG_ATTRS + __tg_asin(float _Complex __x) {return casinf(__x);} + +static double _Complex + _TG_ATTRS + __tg_asin(double _Complex __x) {return casin(__x);} + +static long double _Complex + _TG_ATTRS + __tg_asin(long double _Complex __x) {return casinl(__x);} + +#undef asin +#define asin(__x) __tg_asin(__tg_promote1((__x))(__x)) + +// atan + +static float + _TG_ATTRS + __tg_atan(float __x) {return atanf(__x);} + +static double + _TG_ATTRS + __tg_atan(double __x) {return atan(__x);} + +static long double + _TG_ATTRS + __tg_atan(long double __x) {return atanl(__x);} + +static float _Complex + _TG_ATTRS + __tg_atan(float _Complex __x) {return catanf(__x);} + +static double _Complex + _TG_ATTRS + __tg_atan(double _Complex __x) {return catan(__x);} + +static long double _Complex + _TG_ATTRS + __tg_atan(long double _Complex __x) {return catanl(__x);} + +#undef atan +#define atan(__x) __tg_atan(__tg_promote1((__x))(__x)) + +// acosh + +static float + _TG_ATTRS + __tg_acosh(float __x) {return acoshf(__x);} + +static double + _TG_ATTRS + __tg_acosh(double __x) {return acosh(__x);} + +static long double + _TG_ATTRS + __tg_acosh(long double __x) {return acoshl(__x);} + +static float _Complex + _TG_ATTRS + __tg_acosh(float _Complex __x) {return cacoshf(__x);} + +static double _Complex + _TG_ATTRS + __tg_acosh(double _Complex __x) {return cacosh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_acosh(long double _Complex __x) {return cacoshl(__x);} + +#undef acosh +#define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x)) + +// asinh + +static float + _TG_ATTRS + __tg_asinh(float __x) {return asinhf(__x);} + +static double + _TG_ATTRS + __tg_asinh(double __x) {return asinh(__x);} + +static long double + _TG_ATTRS + __tg_asinh(long double __x) {return asinhl(__x);} + +static float _Complex + _TG_ATTRS + __tg_asinh(float _Complex __x) {return casinhf(__x);} + +static double _Complex + _TG_ATTRS + __tg_asinh(double _Complex __x) {return casinh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_asinh(long double _Complex __x) {return casinhl(__x);} + +#undef asinh +#define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x)) + +// atanh + +static float + _TG_ATTRS + __tg_atanh(float __x) {return atanhf(__x);} + +static double + _TG_ATTRS + __tg_atanh(double __x) {return atanh(__x);} + +static long double + _TG_ATTRS + __tg_atanh(long double __x) {return atanhl(__x);} + +static float _Complex + _TG_ATTRS + __tg_atanh(float _Complex __x) {return catanhf(__x);} + +static double _Complex + _TG_ATTRS + __tg_atanh(double _Complex __x) {return catanh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_atanh(long double _Complex __x) {return catanhl(__x);} + +#undef atanh +#define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x)) + +// cos + +static float + _TG_ATTRS + __tg_cos(float __x) {return cosf(__x);} + +static double + _TG_ATTRS + __tg_cos(double __x) {return cos(__x);} + +static long double + _TG_ATTRS + __tg_cos(long double __x) {return cosl(__x);} + +static float _Complex + _TG_ATTRS + __tg_cos(float _Complex __x) {return ccosf(__x);} + +static double _Complex + _TG_ATTRS + __tg_cos(double _Complex __x) {return ccos(__x);} + +static long double _Complex + _TG_ATTRS + __tg_cos(long double _Complex __x) {return ccosl(__x);} + +#undef cos +#define cos(__x) __tg_cos(__tg_promote1((__x))(__x)) + +// sin + +static float + _TG_ATTRS + __tg_sin(float __x) {return sinf(__x);} + +static double + _TG_ATTRS + __tg_sin(double __x) {return sin(__x);} + +static long double + _TG_ATTRS + __tg_sin(long double __x) {return sinl(__x);} + +static float _Complex + _TG_ATTRS + __tg_sin(float _Complex __x) {return csinf(__x);} + +static double _Complex + _TG_ATTRS + __tg_sin(double _Complex __x) {return csin(__x);} + +static long double _Complex + _TG_ATTRS + __tg_sin(long double _Complex __x) {return csinl(__x);} + +#undef sin +#define sin(__x) __tg_sin(__tg_promote1((__x))(__x)) + +// tan + +static float + _TG_ATTRS + __tg_tan(float __x) {return tanf(__x);} + +static double + _TG_ATTRS + __tg_tan(double __x) {return tan(__x);} + +static long double + _TG_ATTRS + __tg_tan(long double __x) {return tanl(__x);} + +static float _Complex + _TG_ATTRS + __tg_tan(float _Complex __x) {return ctanf(__x);} + +static double _Complex + _TG_ATTRS + __tg_tan(double _Complex __x) {return ctan(__x);} + +static long double _Complex + _TG_ATTRS + __tg_tan(long double _Complex __x) {return ctanl(__x);} + +#undef tan +#define tan(__x) __tg_tan(__tg_promote1((__x))(__x)) + +// cosh + +static float + _TG_ATTRS + __tg_cosh(float __x) {return coshf(__x);} + +static double + _TG_ATTRS + __tg_cosh(double __x) {return cosh(__x);} + +static long double + _TG_ATTRS + __tg_cosh(long double __x) {return coshl(__x);} + +static float _Complex + _TG_ATTRS + __tg_cosh(float _Complex __x) {return ccoshf(__x);} + +static double _Complex + _TG_ATTRS + __tg_cosh(double _Complex __x) {return ccosh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_cosh(long double _Complex __x) {return ccoshl(__x);} + +#undef cosh +#define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x)) + +// sinh + +static float + _TG_ATTRS + __tg_sinh(float __x) {return sinhf(__x);} + +static double + _TG_ATTRS + __tg_sinh(double __x) {return sinh(__x);} + +static long double + _TG_ATTRS + __tg_sinh(long double __x) {return sinhl(__x);} + +static float _Complex + _TG_ATTRS + __tg_sinh(float _Complex __x) {return csinhf(__x);} + +static double _Complex + _TG_ATTRS + __tg_sinh(double _Complex __x) {return csinh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_sinh(long double _Complex __x) {return csinhl(__x);} + +#undef sinh +#define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x)) + +// tanh + +static float + _TG_ATTRS + __tg_tanh(float __x) {return tanhf(__x);} + +static double + _TG_ATTRS + __tg_tanh(double __x) {return tanh(__x);} + +static long double + _TG_ATTRS + __tg_tanh(long double __x) {return tanhl(__x);} + +static float _Complex + _TG_ATTRS + __tg_tanh(float _Complex __x) {return ctanhf(__x);} + +static double _Complex + _TG_ATTRS + __tg_tanh(double _Complex __x) {return ctanh(__x);} + +static long double _Complex + _TG_ATTRS + __tg_tanh(long double _Complex __x) {return ctanhl(__x);} + +#undef tanh +#define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x)) + +// exp + +static float + _TG_ATTRS + __tg_exp(float __x) {return expf(__x);} + +static double + _TG_ATTRS + __tg_exp(double __x) {return exp(__x);} + +static long double + _TG_ATTRS + __tg_exp(long double __x) {return expl(__x);} + +static float _Complex + _TG_ATTRS + __tg_exp(float _Complex __x) {return cexpf(__x);} + +static double _Complex + _TG_ATTRS + __tg_exp(double _Complex __x) {return cexp(__x);} + +static long double _Complex + _TG_ATTRS + __tg_exp(long double _Complex __x) {return cexpl(__x);} + +#undef exp +#define exp(__x) __tg_exp(__tg_promote1((__x))(__x)) + +// log + +static float + _TG_ATTRS + __tg_log(float __x) {return logf(__x);} + +static double + _TG_ATTRS + __tg_log(double __x) {return log(__x);} + +static long double + _TG_ATTRS + __tg_log(long double __x) {return logl(__x);} + +static float _Complex + _TG_ATTRS + __tg_log(float _Complex __x) {return clogf(__x);} + +static double _Complex + _TG_ATTRS + __tg_log(double _Complex __x) {return clog(__x);} + +static long double _Complex + _TG_ATTRS + __tg_log(long double _Complex __x) {return clogl(__x);} + +#undef log +#define log(__x) __tg_log(__tg_promote1((__x))(__x)) + +// pow + +static float + _TG_ATTRS + __tg_pow(float __x, float __y) {return powf(__x, __y);} + +static double + _TG_ATTRS + __tg_pow(double __x, double __y) {return pow(__x, __y);} + +static long double + _TG_ATTRS + __tg_pow(long double __x, long double __y) {return powl(__x, __y);} + +static float _Complex + _TG_ATTRS + __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);} + +static double _Complex + _TG_ATTRS + __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);} + +static long double _Complex + _TG_ATTRS + __tg_pow(long double _Complex __x, long double _Complex __y) + {return cpowl(__x, __y);} + +#undef pow +#define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// sqrt + +static float + _TG_ATTRS + __tg_sqrt(float __x) {return sqrtf(__x);} + +static double + _TG_ATTRS + __tg_sqrt(double __x) {return sqrt(__x);} + +static long double + _TG_ATTRS + __tg_sqrt(long double __x) {return sqrtl(__x);} + +static float _Complex + _TG_ATTRS + __tg_sqrt(float _Complex __x) {return csqrtf(__x);} + +static double _Complex + _TG_ATTRS + __tg_sqrt(double _Complex __x) {return csqrt(__x);} + +static long double _Complex + _TG_ATTRS + __tg_sqrt(long double _Complex __x) {return csqrtl(__x);} + +#undef sqrt +#define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x)) + +// fabs + +static float + _TG_ATTRS + __tg_fabs(float __x) {return fabsf(__x);} + +static double + _TG_ATTRS + __tg_fabs(double __x) {return fabs(__x);} + +static long double + _TG_ATTRS + __tg_fabs(long double __x) {return fabsl(__x);} + +static float + _TG_ATTRS + __tg_fabs(float _Complex __x) {return cabsf(__x);} + +static double + _TG_ATTRS + __tg_fabs(double _Complex __x) {return cabs(__x);} + +static long double + _TG_ATTRS + __tg_fabs(long double _Complex __x) {return cabsl(__x);} + +#undef fabs +#define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x)) + +// atan2 + +static float + _TG_ATTRS + __tg_atan2(float __x, float __y) {return atan2f(__x, __y);} + +static double + _TG_ATTRS + __tg_atan2(double __x, double __y) {return atan2(__x, __y);} + +static long double + _TG_ATTRS + __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);} + +#undef atan2 +#define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// cbrt + +static float + _TG_ATTRS + __tg_cbrt(float __x) {return cbrtf(__x);} + +static double + _TG_ATTRS + __tg_cbrt(double __x) {return cbrt(__x);} + +static long double + _TG_ATTRS + __tg_cbrt(long double __x) {return cbrtl(__x);} + +#undef cbrt +#define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x)) + +// ceil + +static float + _TG_ATTRS + __tg_ceil(float __x) {return ceilf(__x);} + +static double + _TG_ATTRS + __tg_ceil(double __x) {return ceil(__x);} + +static long double + _TG_ATTRS + __tg_ceil(long double __x) {return ceill(__x);} + +#undef ceil +#define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x)) + +// copysign + +static float + _TG_ATTRS + __tg_copysign(float __x, float __y) {return copysignf(__x, __y);} + +static double + _TG_ATTRS + __tg_copysign(double __x, double __y) {return copysign(__x, __y);} + +static long double + _TG_ATTRS + __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);} + +#undef copysign +#define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// erf + +static float + _TG_ATTRS + __tg_erf(float __x) {return erff(__x);} + +static double + _TG_ATTRS + __tg_erf(double __x) {return erf(__x);} + +static long double + _TG_ATTRS + __tg_erf(long double __x) {return erfl(__x);} + +#undef erf +#define erf(__x) __tg_erf(__tg_promote1((__x))(__x)) + +// erfc + +static float + _TG_ATTRS + __tg_erfc(float __x) {return erfcf(__x);} + +static double + _TG_ATTRS + __tg_erfc(double __x) {return erfc(__x);} + +static long double + _TG_ATTRS + __tg_erfc(long double __x) {return erfcl(__x);} + +#undef erfc +#define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x)) + +// exp2 + +static float + _TG_ATTRS + __tg_exp2(float __x) {return exp2f(__x);} + +static double + _TG_ATTRS + __tg_exp2(double __x) {return exp2(__x);} + +static long double + _TG_ATTRS + __tg_exp2(long double __x) {return exp2l(__x);} + +#undef exp2 +#define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x)) + +// expm1 + +static float + _TG_ATTRS + __tg_expm1(float __x) {return expm1f(__x);} + +static double + _TG_ATTRS + __tg_expm1(double __x) {return expm1(__x);} + +static long double + _TG_ATTRS + __tg_expm1(long double __x) {return expm1l(__x);} + +#undef expm1 +#define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x)) + +// fdim + +static float + _TG_ATTRS + __tg_fdim(float __x, float __y) {return fdimf(__x, __y);} + +static double + _TG_ATTRS + __tg_fdim(double __x, double __y) {return fdim(__x, __y);} + +static long double + _TG_ATTRS + __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);} + +#undef fdim +#define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// floor + +static float + _TG_ATTRS + __tg_floor(float __x) {return floorf(__x);} + +static double + _TG_ATTRS + __tg_floor(double __x) {return floor(__x);} + +static long double + _TG_ATTRS + __tg_floor(long double __x) {return floorl(__x);} + +#undef floor +#define floor(__x) __tg_floor(__tg_promote1((__x))(__x)) + +// fma + +static float + _TG_ATTRS + __tg_fma(float __x, float __y, float __z) + {return fmaf(__x, __y, __z);} + +static double + _TG_ATTRS + __tg_fma(double __x, double __y, double __z) + {return fma(__x, __y, __z);} + +static long double + _TG_ATTRS + __tg_fma(long double __x,long double __y, long double __z) + {return fmal(__x, __y, __z);} + +#undef fma +#define fma(__x, __y, __z) \ + __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \ + __tg_promote3((__x), (__y), (__z))(__y), \ + __tg_promote3((__x), (__y), (__z))(__z)) + +// fmax + +static float + _TG_ATTRS + __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);} + +static double + _TG_ATTRS + __tg_fmax(double __x, double __y) {return fmax(__x, __y);} + +static long double + _TG_ATTRS + __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);} + +#undef fmax +#define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// fmin + +static float + _TG_ATTRS + __tg_fmin(float __x, float __y) {return fminf(__x, __y);} + +static double + _TG_ATTRS + __tg_fmin(double __x, double __y) {return fmin(__x, __y);} + +static long double + _TG_ATTRS + __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);} + +#undef fmin +#define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// fmod + +static float + _TG_ATTRS + __tg_fmod(float __x, float __y) {return fmodf(__x, __y);} + +static double + _TG_ATTRS + __tg_fmod(double __x, double __y) {return fmod(__x, __y);} + +static long double + _TG_ATTRS + __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);} + +#undef fmod +#define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// frexp + +static float + _TG_ATTRS + __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);} + +static double + _TG_ATTRS + __tg_frexp(double __x, int* __y) {return frexp(__x, __y);} + +static long double + _TG_ATTRS + __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);} + +#undef frexp +#define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y) + +// hypot + +static float + _TG_ATTRS + __tg_hypot(float __x, float __y) {return hypotf(__x, __y);} + +static double + _TG_ATTRS + __tg_hypot(double __x, double __y) {return hypot(__x, __y);} + +static long double + _TG_ATTRS + __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);} + +#undef hypot +#define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// ilogb + +static int + _TG_ATTRS + __tg_ilogb(float __x) {return ilogbf(__x);} + +static int + _TG_ATTRS + __tg_ilogb(double __x) {return ilogb(__x);} + +static int + _TG_ATTRS + __tg_ilogb(long double __x) {return ilogbl(__x);} + +#undef ilogb +#define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x)) + +// ldexp + +static float + _TG_ATTRS + __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);} + +static double + _TG_ATTRS + __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);} + +static long double + _TG_ATTRS + __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);} + +#undef ldexp +#define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y) + +// lgamma + +static float + _TG_ATTRS + __tg_lgamma(float __x) {return lgammaf(__x);} + +static double + _TG_ATTRS + __tg_lgamma(double __x) {return lgamma(__x);} + +static long double + _TG_ATTRS + __tg_lgamma(long double __x) {return lgammal(__x);} + +#undef lgamma +#define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x)) + +// llrint + +static long long + _TG_ATTRS + __tg_llrint(float __x) {return llrintf(__x);} + +static long long + _TG_ATTRS + __tg_llrint(double __x) {return llrint(__x);} + +static long long + _TG_ATTRS + __tg_llrint(long double __x) {return llrintl(__x);} + +#undef llrint +#define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x)) + +// llround + +static long long + _TG_ATTRS + __tg_llround(float __x) {return llroundf(__x);} + +static long long + _TG_ATTRS + __tg_llround(double __x) {return llround(__x);} + +static long long + _TG_ATTRS + __tg_llround(long double __x) {return llroundl(__x);} + +#undef llround +#define llround(__x) __tg_llround(__tg_promote1((__x))(__x)) + +// log10 + +static float + _TG_ATTRS + __tg_log10(float __x) {return log10f(__x);} + +static double + _TG_ATTRS + __tg_log10(double __x) {return log10(__x);} + +static long double + _TG_ATTRS + __tg_log10(long double __x) {return log10l(__x);} + +#undef log10 +#define log10(__x) __tg_log10(__tg_promote1((__x))(__x)) + +// log1p + +static float + _TG_ATTRS + __tg_log1p(float __x) {return log1pf(__x);} + +static double + _TG_ATTRS + __tg_log1p(double __x) {return log1p(__x);} + +static long double + _TG_ATTRS + __tg_log1p(long double __x) {return log1pl(__x);} + +#undef log1p +#define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x)) + +// log2 + +static float + _TG_ATTRS + __tg_log2(float __x) {return log2f(__x);} + +static double + _TG_ATTRS + __tg_log2(double __x) {return log2(__x);} + +static long double + _TG_ATTRS + __tg_log2(long double __x) {return log2l(__x);} + +#undef log2 +#define log2(__x) __tg_log2(__tg_promote1((__x))(__x)) + +// logb + +static float + _TG_ATTRS + __tg_logb(float __x) {return logbf(__x);} + +static double + _TG_ATTRS + __tg_logb(double __x) {return logb(__x);} + +static long double + _TG_ATTRS + __tg_logb(long double __x) {return logbl(__x);} + +#undef logb +#define logb(__x) __tg_logb(__tg_promote1((__x))(__x)) + +// lrint + +static long + _TG_ATTRS + __tg_lrint(float __x) {return lrintf(__x);} + +static long + _TG_ATTRS + __tg_lrint(double __x) {return lrint(__x);} + +static long + _TG_ATTRS + __tg_lrint(long double __x) {return lrintl(__x);} + +#undef lrint +#define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x)) + +// lround + +static long + _TG_ATTRS + __tg_lround(float __x) {return lroundf(__x);} + +static long + _TG_ATTRS + __tg_lround(double __x) {return lround(__x);} + +static long + _TG_ATTRS + __tg_lround(long double __x) {return lroundl(__x);} + +#undef lround +#define lround(__x) __tg_lround(__tg_promote1((__x))(__x)) + +// nearbyint + +static float + _TG_ATTRS + __tg_nearbyint(float __x) {return nearbyintf(__x);} + +static double + _TG_ATTRS + __tg_nearbyint(double __x) {return nearbyint(__x);} + +static long double + _TG_ATTRS + __tg_nearbyint(long double __x) {return nearbyintl(__x);} + +#undef nearbyint +#define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x)) + +// nextafter + +static float + _TG_ATTRS + __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);} + +static double + _TG_ATTRS + __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);} + +static long double + _TG_ATTRS + __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);} + +#undef nextafter +#define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// nexttoward + +static float + _TG_ATTRS + __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);} + +static double + _TG_ATTRS + __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);} + +static long double + _TG_ATTRS + __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);} + +#undef nexttoward +#define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y)) + +// remainder + +static float + _TG_ATTRS + __tg_remainder(float __x, float __y) {return remainderf(__x, __y);} + +static double + _TG_ATTRS + __tg_remainder(double __x, double __y) {return remainder(__x, __y);} + +static long double + _TG_ATTRS + __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);} + +#undef remainder +#define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y)) + +// remquo + +static float + _TG_ATTRS + __tg_remquo(float __x, float __y, int* __z) + {return remquof(__x, __y, __z);} + +static double + _TG_ATTRS + __tg_remquo(double __x, double __y, int* __z) + {return remquo(__x, __y, __z);} + +static long double + _TG_ATTRS + __tg_remquo(long double __x,long double __y, int* __z) + {return remquol(__x, __y, __z);} + +#undef remquo +#define remquo(__x, __y, __z) \ + __tg_remquo(__tg_promote2((__x), (__y))(__x), \ + __tg_promote2((__x), (__y))(__y), \ + (__z)) + +// rint + +static float + _TG_ATTRS + __tg_rint(float __x) {return rintf(__x);} + +static double + _TG_ATTRS + __tg_rint(double __x) {return rint(__x);} + +static long double + _TG_ATTRS + __tg_rint(long double __x) {return rintl(__x);} + +#undef rint +#define rint(__x) __tg_rint(__tg_promote1((__x))(__x)) + +// round + +static float + _TG_ATTRS + __tg_round(float __x) {return roundf(__x);} + +static double + _TG_ATTRS + __tg_round(double __x) {return round(__x);} + +static long double + _TG_ATTRS + __tg_round(long double __x) {return roundl(__x);} + +#undef round +#define round(__x) __tg_round(__tg_promote1((__x))(__x)) + +// scalbn + +static float + _TG_ATTRS + __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);} + +static double + _TG_ATTRS + __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);} + +static long double + _TG_ATTRS + __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);} + +#undef scalbn +#define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y) + +// scalbln + +static float + _TG_ATTRS + __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);} + +static double + _TG_ATTRS + __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);} + +static long double + _TG_ATTRS + __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);} + +#undef scalbln +#define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y) + +// tgamma + +static float + _TG_ATTRS + __tg_tgamma(float __x) {return tgammaf(__x);} + +static double + _TG_ATTRS + __tg_tgamma(double __x) {return tgamma(__x);} + +static long double + _TG_ATTRS + __tg_tgamma(long double __x) {return tgammal(__x);} + +#undef tgamma +#define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x)) + +// trunc + +static float + _TG_ATTRS + __tg_trunc(float __x) {return truncf(__x);} + +static double + _TG_ATTRS + __tg_trunc(double __x) {return trunc(__x);} + +static long double + _TG_ATTRS + __tg_trunc(long double __x) {return truncl(__x);} + +#undef trunc +#define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x)) + +// carg + +static float + _TG_ATTRS + __tg_carg(float __x) {return atan2f(0.F, __x);} + +static double + _TG_ATTRS + __tg_carg(double __x) {return atan2(0., __x);} + +static long double + _TG_ATTRS + __tg_carg(long double __x) {return atan2l(0.L, __x);} + +static float + _TG_ATTRS + __tg_carg(float _Complex __x) {return cargf(__x);} + +static double + _TG_ATTRS + __tg_carg(double _Complex __x) {return carg(__x);} + +static long double + _TG_ATTRS + __tg_carg(long double _Complex __x) {return cargl(__x);} + +#undef carg +#define carg(__x) __tg_carg(__tg_promote1((__x))(__x)) + +// cimag + +static float + _TG_ATTRS + __tg_cimag(float __x) {return 0;} + +static double + _TG_ATTRS + __tg_cimag(double __x) {return 0;} + +static long double + _TG_ATTRS + __tg_cimag(long double __x) {return 0;} + +static float + _TG_ATTRS + __tg_cimag(float _Complex __x) {return cimagf(__x);} + +static double + _TG_ATTRS + __tg_cimag(double _Complex __x) {return cimag(__x);} + +static long double + _TG_ATTRS + __tg_cimag(long double _Complex __x) {return cimagl(__x);} + +#undef cimag +#define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x)) + +// conj + +static float _Complex + _TG_ATTRS + __tg_conj(float __x) {return __x;} + +static double _Complex + _TG_ATTRS + __tg_conj(double __x) {return __x;} + +static long double _Complex + _TG_ATTRS + __tg_conj(long double __x) {return __x;} + +static float _Complex + _TG_ATTRS + __tg_conj(float _Complex __x) {return conjf(__x);} + +static double _Complex + _TG_ATTRS + __tg_conj(double _Complex __x) {return conj(__x);} + +static long double _Complex + _TG_ATTRS + __tg_conj(long double _Complex __x) {return conjl(__x);} + +#undef conj +#define conj(__x) __tg_conj(__tg_promote1((__x))(__x)) + +// cproj + +static float _Complex + _TG_ATTRS + __tg_cproj(float __x) {return cprojf(__x);} + +static double _Complex + _TG_ATTRS + __tg_cproj(double __x) {return cproj(__x);} + +static long double _Complex + _TG_ATTRS + __tg_cproj(long double __x) {return cprojl(__x);} + +static float _Complex + _TG_ATTRS + __tg_cproj(float _Complex __x) {return cprojf(__x);} + +static double _Complex + _TG_ATTRS + __tg_cproj(double _Complex __x) {return cproj(__x);} + +static long double _Complex + _TG_ATTRS + __tg_cproj(long double _Complex __x) {return cprojl(__x);} + +#undef cproj +#define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x)) + +// creal + +static float + _TG_ATTRS + __tg_creal(float __x) {return __x;} + +static double + _TG_ATTRS + __tg_creal(double __x) {return __x;} + +static long double + _TG_ATTRS + __tg_creal(long double __x) {return __x;} + +static float + _TG_ATTRS + __tg_creal(float _Complex __x) {return crealf(__x);} + +static double + _TG_ATTRS + __tg_creal(double _Complex __x) {return creal(__x);} + +static long double + _TG_ATTRS + __tg_creal(long double _Complex __x) {return creall(__x);} + +#undef creal +#define creal(__x) __tg_creal(__tg_promote1((__x))(__x)) + +#undef _TG_ATTRSp +#undef _TG_ATTRS + +#endif /* __cplusplus */ +#endif /* __TGMATH_H */ diff --git a/python/clang/5.1/include/tmmintrin.h b/python/clang/5.1/include/tmmintrin.h new file mode 100644 index 00000000..4238f5b3 --- /dev/null +++ b/python/clang/5.1/include/tmmintrin.h @@ -0,0 +1,225 @@ +/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __TMMINTRIN_H +#define __TMMINTRIN_H + +#ifndef __SSSE3__ +#error "SSSE3 instruction set not enabled" +#else + +#include + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_abs_pi8(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsb((__v8qi)__a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_abs_epi8(__m128i __a) +{ + return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_abs_pi16(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsw((__v4hi)__a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_abs_epi16(__m128i __a) +{ + return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_abs_pi32(__m64 __a) +{ + return (__m64)__builtin_ia32_pabsd((__v2si)__a); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_abs_epi32(__m128i __a) +{ + return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); +} + +#define _mm_alignr_epi8(a, b, n) __extension__ ({ \ + __m128i __a = (a); \ + __m128i __b = (b); \ + (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); }) + +#define _mm_alignr_pi8(a, b, n) __extension__ ({ \ + __m64 __a = (a); \ + __m64 __b = (b); \ + (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hadd_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hadd_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_hadd_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_hadd_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hadds_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_hadds_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsub_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsub_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_hsub_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_hsub_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_hsubs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maddubs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_maddubs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_mulhrs_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_mulhrs_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shuffle_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_shuffle_pi8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sign_epi8(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sign_epi16(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sign_epi32(__m128i __a, __m128i __b) +{ + return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sign_pi8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sign_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sign_pi32(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); +} + +#endif /* __SSSE3__ */ + +#endif /* __TMMINTRIN_H */ diff --git a/python/clang/5.1/include/unwind.h b/python/clang/5.1/include/unwind.h new file mode 100644 index 00000000..685c1dfd --- /dev/null +++ b/python/clang/5.1/include/unwind.h @@ -0,0 +1,280 @@ +/*===---- unwind.h - Stack unwinding ----------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +/* See "Data Definitions for libgcc_s" in the Linux Standard Base.*/ + +#ifndef __CLANG_UNWIND_H +#define __CLANG_UNWIND_H + +#if __has_include_next() +/* Darwin (from 11.x on) and libunwind provide an unwind.h. If that's available, + * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE, + * so define that around the include.*/ +# ifndef _GNU_SOURCE +# define _SHOULD_UNDEFINE_GNU_SOURCE +# define _GNU_SOURCE +# endif +// libunwind's unwind.h reflects the current visibility. However, Mozilla +// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the +// visibility to default and export its contents. gcc also allows users to +// override its override by #defining HIDE_EXPORTS (but note, this only obeys +// the user's -fvisibility setting; it doesn't hide any exports on its own). We +// imitate gcc's header here: +# ifdef HIDE_EXPORTS +# include_next +# else +# pragma GCC visibility push(default) +# include_next +# pragma GCC visibility pop +# endif +# ifdef _SHOULD_UNDEFINE_GNU_SOURCE +# undef _GNU_SOURCE +# undef _SHOULD_UNDEFINE_GNU_SOURCE +# endif +#else + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* It is a bit strange for a header to play with the visibility of the + symbols it declares, but this matches gcc's behavior and some programs + depend on it */ +#ifndef HIDE_EXPORTS +#pragma GCC visibility push(default) +#endif + +typedef uintptr_t _Unwind_Word; +typedef intptr_t _Unwind_Sword; +typedef uintptr_t _Unwind_Ptr; +typedef uintptr_t _Unwind_Internal_Ptr; +typedef uint64_t _Unwind_Exception_Class; + +typedef intptr_t _sleb128_t; +typedef uintptr_t _uleb128_t; + +struct _Unwind_Context; +struct _Unwind_Exception; +typedef enum { + _URC_NO_REASON = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8 +} _Unwind_Reason_Code; + +typedef enum { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, + + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */ +} _Unwind_Action; + +typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, + struct _Unwind_Exception *); + +struct _Unwind_Exception { + _Unwind_Exception_Class exception_class; + _Unwind_Exception_Cleanup_Fn exception_cleanup; + _Unwind_Word private_1; + _Unwind_Word private_2; + /* The Itanium ABI requires that _Unwind_Exception objects are "double-word + * aligned". GCC has interpreted this to mean "use the maximum useful + * alignment for the target"; so do we. */ +} __attribute__((__aligned__)); + +typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, + _Unwind_Exception_Class, + struct _Unwind_Exception *, + struct _Unwind_Context *, + void *); + +typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)( + int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *, + struct _Unwind_Context *); +typedef _Unwind_Personality_Fn __personality_routine; + +typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, + void *); + +#if defined(__arm__) && !defined(__APPLE__) + +typedef enum { + _UVRSC_CORE = 0, /* integer register */ + _UVRSC_VFP = 1, /* vfp */ + _UVRSC_WMMXD = 3, /* Intel WMMX data register */ + _UVRSC_WMMXC = 4 /* Intel WMMX control register */ +} _Unwind_VRS_RegClass; + +typedef enum { + _UVRSD_UINT32 = 0, + _UVRSD_VFPX = 1, + _UVRSD_UINT64 = 3, + _UVRSD_FLOAT = 4, + _UVRSD_DOUBLE = 5 +} _Unwind_VRS_DataRepresentation; + +typedef enum { + _UVRSR_OK = 0, + _UVRSR_NOT_IMPLEMENTED = 1, + _UVRSR_FAILED = 2 +} _Unwind_VRS_Result; + +_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context, + _Unwind_VRS_RegClass __regclass, + uint32_t __regno, + _Unwind_VRS_DataRepresentation __representation, + void *__valuep); + +_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context, + _Unwind_VRS_RegClass __regclass, + uint32_t __regno, + _Unwind_VRS_DataRepresentation __representation, + void *__valuep); + +static __inline__ +_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) { + _Unwind_Word __value; + _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); + return __value; +} + +static __inline__ +void _Unwind_SetGR(struct _Unwind_Context *__context, int __index, + _Unwind_Word __value) { + _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value); +} + +static __inline__ +_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) { + _Unwind_Word __ip = _Unwind_GetGR(__context, 15); + return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */ +} + +static __inline__ +void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) { + _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1; + _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit); +} +#else +_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int); +void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word); + +_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *); +void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word); +#endif + + +_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *); + +_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *); + +void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *); + +_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); + +/* DWARF EH functions; currently not available on Darwin/ARM */ +#if !defined(__APPLE__) || !defined(__arm__) + +_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); +void _Unwind_DeleteException(struct _Unwind_Exception *); +void _Unwind_Resume(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *); + +#endif + +_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + +/* setjmp(3)/longjmp(3) stuff */ +typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t; + +void _Unwind_SjLj_Register(_Unwind_FunctionContext_t); +void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t); +_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); +void _Unwind_SjLj_Resume(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *); + +void *_Unwind_FindEnclosingFunction(void *); + +#ifdef __APPLE__ + +_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *) + __attribute__((unavailable)); +_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *) + __attribute__((unavailable)); + +/* Darwin-specific functions */ +void __register_frame(const void *); +void __deregister_frame(const void *); + +struct dwarf_eh_bases { + uintptr_t tbase; + uintptr_t dbase; + uintptr_t func; +}; +void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *); + +void __register_frame_info_bases(const void *, void *, void *, void *) + __attribute__((unavailable)); +void __register_frame_info(const void *, void *) __attribute__((unavailable)); +void __register_frame_info_table_bases(const void *, void*, void *, void *) + __attribute__((unavailable)); +void __register_frame_info_table(const void *, void *) + __attribute__((unavailable)); +void __register_frame_table(const void *) __attribute__((unavailable)); +void __deregister_frame_info(const void *) __attribute__((unavailable)); +void __deregister_frame_info_bases(const void *)__attribute__((unavailable)); + +#else + +_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *); +_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *); + +#endif + + +#ifndef HIDE_EXPORTS +#pragma GCC visibility pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif + +#endif /* __CLANG_UNWIND_H */ diff --git a/python/clang/5.1/include/varargs.h b/python/clang/5.1/include/varargs.h new file mode 100644 index 00000000..b5477d0a --- /dev/null +++ b/python/clang/5.1/include/varargs.h @@ -0,0 +1,26 @@ +/*===---- varargs.h - Variable argument handling -------------------------------------=== +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +* +*===-----------------------------------------------------------------------=== +*/ +#ifndef __VARARGS_H +#define __VARARGS_H + #error "Please use instead of " +#endif diff --git a/python/clang/5.1/include/wmmintrin.h b/python/clang/5.1/include/wmmintrin.h new file mode 100644 index 00000000..369e3c20 --- /dev/null +++ b/python/clang/5.1/include/wmmintrin.h @@ -0,0 +1,42 @@ +/*===---- wmmintrin.h - AES intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _WMMINTRIN_H +#define _WMMINTRIN_H + +#include + +#if !defined (__AES__) && !defined (__PCLMUL__) +# error "AES/PCLMUL instructions not enabled" +#else + +#ifdef __AES__ +#include <__wmmintrin_aes.h> +#endif /* __AES__ */ + +#ifdef __PCLMUL__ +#include <__wmmintrin_pclmul.h> +#endif /* __PCLMUL__ */ + +#endif /* __AES__ || __PCLMUL__ */ +#endif /* _WMMINTRIN_H */ diff --git a/python/clang/5.1/include/x86intrin.h b/python/clang/5.1/include/x86intrin.h new file mode 100644 index 00000000..399016f1 --- /dev/null +++ b/python/clang/5.1/include/x86intrin.h @@ -0,0 +1,79 @@ +/*===---- x86intrin.h - X86 intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#define __X86INTRIN_H + +#include + +#ifdef __3dNOW__ +#include +#endif + +#ifdef __BMI__ +#include +#endif + +#ifdef __BMI2__ +#include +#endif + +#ifdef __LZCNT__ +#include +#endif + +#ifdef __POPCNT__ +#include +#endif + +#ifdef __RDSEED__ +#include +#endif + +#ifdef __PRFCHW__ +#include +#endif + +#ifdef __SSE4A__ +#include +#endif + +#ifdef __FMA4__ +#include +#endif + +#ifdef __XOP__ +#include +#endif + +#ifdef __TBM__ +#include +#endif + +#ifdef __F16C__ +#include +#endif + +// FIXME: LWP + +#endif /* __X86INTRIN_H */ diff --git a/python/clang/5.1/include/xmmintrin.h b/python/clang/5.1/include/xmmintrin.h new file mode 100644 index 00000000..c68d3ed7 --- /dev/null +++ b/python/clang/5.1/include/xmmintrin.h @@ -0,0 +1,1001 @@ +/*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __XMMINTRIN_H +#define __XMMINTRIN_H + +#ifndef __SSE__ +#error "SSE instruction set not enabled" +#else + +#include + +typedef int __v4si __attribute__((__vector_size__(16))); +typedef float __v4sf __attribute__((__vector_size__(16))); +typedef float __m128 __attribute__((__vector_size__(16))); + +// This header should only be included in a hosted environment as it depends on +// a standard library to provide allocation routines. +#if __STDC_HOSTED__ +#include +#endif + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_add_ss(__m128 __a, __m128 __b) +{ + __a[0] += __b[0]; + return __a; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_add_ps(__m128 __a, __m128 __b) +{ + return __a + __b; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_sub_ss(__m128 __a, __m128 __b) +{ + __a[0] -= __b[0]; + return __a; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_sub_ps(__m128 __a, __m128 __b) +{ + return __a - __b; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_mul_ss(__m128 __a, __m128 __b) +{ + __a[0] *= __b[0]; + return __a; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_mul_ps(__m128 __a, __m128 __b) +{ + return __a * __b; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_div_ss(__m128 __a, __m128 __b) +{ + __a[0] /= __b[0]; + return __a; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_div_ps(__m128 __a, __m128 __b) +{ + return __a / __b; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_sqrt_ss(__m128 __a) +{ + __m128 __c = __builtin_ia32_sqrtss(__a); + return (__m128) { __c[0], __a[1], __a[2], __a[3] }; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_sqrt_ps(__m128 __a) +{ + return __builtin_ia32_sqrtps(__a); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_rcp_ss(__m128 __a) +{ + __m128 __c = __builtin_ia32_rcpss(__a); + return (__m128) { __c[0], __a[1], __a[2], __a[3] }; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_rcp_ps(__m128 __a) +{ + return __builtin_ia32_rcpps(__a); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_rsqrt_ss(__m128 __a) +{ + __m128 __c = __builtin_ia32_rsqrtss(__a); + return (__m128) { __c[0], __a[1], __a[2], __a[3] }; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_rsqrt_ps(__m128 __a) +{ + return __builtin_ia32_rsqrtps(__a); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_min_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_minss(__a, __b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_min_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_minps(__a, __b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_max_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_maxss(__a, __b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_max_ps(__m128 __a, __m128 __b) +{ + return __builtin_ia32_maxps(__a, __b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_and_ps(__m128 __a, __m128 __b) +{ + return (__m128)((__v4si)__a & (__v4si)__b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_andnot_ps(__m128 __a, __m128 __b) +{ + return (__m128)(~(__v4si)__a & (__v4si)__b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_or_ps(__m128 __a, __m128 __b) +{ + return (__m128)((__v4si)__a | (__v4si)__b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_xor_ps(__m128 __a, __m128 __b) +{ + return (__m128)((__v4si)__a ^ (__v4si)__b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpss(__a, __b, 0); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpeq_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__a, __b, 0); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpss(__a, __b, 1); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmplt_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__a, __b, 1); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpss(__a, __b, 2); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmple_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__a, __b, 2); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 1), + 4, 1, 2, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpgt_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__b, __a, 1); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 2), + 4, 1, 2, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpge_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__b, __a, 2); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpss(__a, __b, 4); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpneq_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__a, __b, 4); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnlt_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpss(__a, __b, 5); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnlt_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__a, __b, 5); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnle_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpss(__a, __b, 6); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnle_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__a, __b, 6); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpngt_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 5), + 4, 1, 2, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpngt_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__b, __a, 5); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnge_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_shufflevector(__a, + __builtin_ia32_cmpss(__b, __a, 6), + 4, 1, 2, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpnge_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__b, __a, 6); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpord_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpss(__a, __b, 7); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpord_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__a, __b, 7); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpunord_ss(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpss(__a, __b, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cmpunord_ps(__m128 __a, __m128 __b) +{ + return (__m128)__builtin_ia32_cmpps(__a, __b, 3); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comieq_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comieq(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comilt_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comilt(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comile_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comile(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comigt_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comigt(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comige_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comige(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_comineq_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_comineq(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomieq_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomieq(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomilt_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomilt(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomile_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomile(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomigt_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomigt(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomige_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomige(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_ucomineq_ss(__m128 __a, __m128 __b) +{ + return __builtin_ia32_ucomineq(__a, __b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_cvtss_si32(__m128 __a) +{ + return __builtin_ia32_cvtss2si(__a); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_cvt_ss2si(__m128 __a) +{ + return _mm_cvtss_si32(__a); +} + +#ifdef __x86_64__ + +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +_mm_cvtss_si64(__m128 __a) +{ + return __builtin_ia32_cvtss2si64(__a); +} + +#endif + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtps_pi32(__m128 __a) +{ + return (__m64)__builtin_ia32_cvtps2pi(__a); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvt_ps2pi(__m128 __a) +{ + return _mm_cvtps_pi32(__a); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_cvttss_si32(__m128 __a) +{ + return __a[0]; +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_cvtt_ss2si(__m128 __a) +{ + return _mm_cvttss_si32(__a); +} + +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +_mm_cvttss_si64(__m128 __a) +{ + return __a[0]; +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvttps_pi32(__m128 __a) +{ + return (__m64)__builtin_ia32_cvttps2pi(__a); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtt_ps2pi(__m128 __a) +{ + return _mm_cvttps_pi32(__a); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi32_ss(__m128 __a, int __b) +{ + __a[0] = __b; + return __a; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvt_si2ss(__m128 __a, int __b) +{ + return _mm_cvtsi32_ss(__a, __b); +} + +#ifdef __x86_64__ + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtsi64_ss(__m128 __a, long long __b) +{ + __a[0] = __b; + return __a; +} + +#endif + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpi32_ps(__m128 __a, __m64 __b) +{ + return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvt_pi2ps(__m128 __a, __m64 __b) +{ + return _mm_cvtpi32_ps(__a, __b); +} + +static __inline__ float __attribute__((__always_inline__, __nodebug__)) +_mm_cvtss_f32(__m128 __a) +{ + return __a[0]; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_loadh_pi(__m128 __a, const __m64 *__p) +{ + typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); + struct __mm_loadh_pi_struct { + __mm_loadh_pi_v2f32 __u; + } __attribute__((__packed__, __may_alias__)); + __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u; + __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); + return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_loadl_pi(__m128 __a, const __m64 *__p) +{ + typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); + struct __mm_loadl_pi_struct { + __mm_loadl_pi_v2f32 __u; + } __attribute__((__packed__, __may_alias__)); + __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u; + __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); + return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_load_ss(const float *__p) +{ + struct __mm_load_ss_struct { + float __u; + } __attribute__((__packed__, __may_alias__)); + float __u = ((struct __mm_load_ss_struct*)__p)->__u; + return (__m128){ __u, 0, 0, 0 }; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_load1_ps(const float *__p) +{ + struct __mm_load1_ps_struct { + float __u; + } __attribute__((__packed__, __may_alias__)); + float __u = ((struct __mm_load1_ps_struct*)__p)->__u; + return (__m128){ __u, __u, __u, __u }; +} + +#define _mm_load_ps1(p) _mm_load1_ps(p) + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_load_ps(const float *__p) +{ + return *(__m128*)__p; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_loadu_ps(const float *__p) +{ + struct __loadu_ps { + __m128 __v; + } __attribute__((__packed__, __may_alias__)); + return ((struct __loadu_ps*)__p)->__v; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_loadr_ps(const float *__p) +{ + __m128 __a = _mm_load_ps(__p); + return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_set_ss(float __w) +{ + return (__m128){ __w, 0, 0, 0 }; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_set1_ps(float __w) +{ + return (__m128){ __w, __w, __w, __w }; +} + +// Microsoft specific. +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_set_ps1(float __w) +{ + return _mm_set1_ps(__w); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_set_ps(float __z, float __y, float __x, float __w) +{ + return (__m128){ __w, __x, __y, __z }; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_setr_ps(float __z, float __y, float __x, float __w) +{ + return (__m128){ __z, __y, __x, __w }; +} + +static __inline__ __m128 __attribute__((__always_inline__)) +_mm_setzero_ps(void) +{ + return (__m128){ 0, 0, 0, 0 }; +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_storeh_pi(__m64 *__p, __m128 __a) +{ + __builtin_ia32_storehps((__v2si *)__p, __a); +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_storel_pi(__m64 *__p, __m128 __a) +{ + __builtin_ia32_storelps((__v2si *)__p, __a); +} + +static __inline__ void __attribute__((__always_inline__)) +_mm_store_ss(float *__p, __m128 __a) +{ + struct __mm_store_ss_struct { + float __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_storeu_ps(float *__p, __m128 __a) +{ + __builtin_ia32_storeups(__p, __a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_store1_ps(float *__p, __m128 __a) +{ + __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0); + _mm_storeu_ps(__p, __a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_store_ps1(float *__p, __m128 __a) +{ + return _mm_store1_ps(__p, __a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_store_ps(float *__p, __m128 __a) +{ + *(__m128 *)__p = __a; +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_storer_ps(float *__p, __m128 __a) +{ + __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0); + _mm_store_ps(__p, __a); +} + +#define _MM_HINT_T0 3 +#define _MM_HINT_T1 2 +#define _MM_HINT_T2 1 +#define _MM_HINT_NTA 0 + +/* FIXME: We have to #define this because "sel" must be a constant integer, and + Sema doesn't do any form of constant propagation yet. */ + +#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel))) + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_pi(__m64 *__p, __m64 __a) +{ + __builtin_ia32_movntq(__p, __a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_ps(float *__p, __m128 __a) +{ + __builtin_ia32_movntps(__p, __a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_sfence(void) +{ + __builtin_ia32_sfence(); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_extract_pi16(__m64 __a, int __n) +{ + __v4hi __b = (__v4hi)__a; + return (unsigned short)__b[__n & 3]; +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_insert_pi16(__m64 __a, int __d, int __n) +{ + __v4hi __b = (__v4hi)__a; + __b[__n & 3] = __d; + return (__m64)__b; +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_max_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_max_pu8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_min_pi16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_min_pu8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_movemask_pi8(__m64 __a) +{ + return __builtin_ia32_pmovmskb((__v8qi)__a); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_mulhi_pu16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); +} + +#define _mm_shuffle_pi16(a, n) __extension__ ({ \ + __m64 __a = (a); \ + (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); }) + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) +{ + __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_avg_pu8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_avg_pu16(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_sad_pu8(__m64 __a, __m64 __b) +{ + return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +_mm_getcsr(void) +{ + return __builtin_ia32_stmxcsr(); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_setcsr(unsigned int __i) +{ + __builtin_ia32_ldmxcsr(__i); +} + +#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \ + __m128 __a = (a); \ + __m128 __b = (b); \ + (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \ + (mask) & 0x3, ((mask) & 0xc) >> 2, \ + (((mask) & 0x30) >> 4) + 4, \ + (((mask) & 0xc0) >> 6) + 4); }) + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_unpackhi_ps(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_unpacklo_ps(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_move_ss(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 4, 1, 2, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_movehl_ps(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 6, 7, 2, 3); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_movelh_ps(__m128 __a, __m128 __b) +{ + return __builtin_shufflevector(__a, __b, 0, 1, 4, 5); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpi16_ps(__m64 __a) +{ + __m64 __b, __c; + __m128 __r; + + __b = _mm_setzero_si64(); + __b = _mm_cmpgt_pi16(__b, __a); + __c = _mm_unpackhi_pi16(__a, __b); + __r = _mm_setzero_ps(); + __r = _mm_cvtpi32_ps(__r, __c); + __r = _mm_movelh_ps(__r, __r); + __c = _mm_unpacklo_pi16(__a, __b); + __r = _mm_cvtpi32_ps(__r, __c); + + return __r; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpu16_ps(__m64 __a) +{ + __m64 __b, __c; + __m128 __r; + + __b = _mm_setzero_si64(); + __c = _mm_unpackhi_pi16(__a, __b); + __r = _mm_setzero_ps(); + __r = _mm_cvtpi32_ps(__r, __c); + __r = _mm_movelh_ps(__r, __r); + __c = _mm_unpacklo_pi16(__a, __b); + __r = _mm_cvtpi32_ps(__r, __c); + + return __r; +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpi8_ps(__m64 __a) +{ + __m64 __b; + + __b = _mm_setzero_si64(); + __b = _mm_cmpgt_pi8(__b, __a); + __b = _mm_unpacklo_pi8(__a, __b); + + return _mm_cvtpi16_ps(__b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpu8_ps(__m64 __a) +{ + __m64 __b; + + __b = _mm_setzero_si64(); + __b = _mm_unpacklo_pi8(__a, __b); + + return _mm_cvtpi16_ps(__b); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtpi32x2_ps(__m64 __a, __m64 __b) +{ + __m128 __c; + + __c = _mm_setzero_ps(); + __c = _mm_cvtpi32_ps(__c, __b); + __c = _mm_movelh_ps(__c, __c); + + return _mm_cvtpi32_ps(__c, __a); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtps_pi16(__m128 __a) +{ + __m64 __b, __c; + + __b = _mm_cvtps_pi32(__a); + __a = _mm_movehl_ps(__a, __a); + __c = _mm_cvtps_pi32(__a); + + return _mm_packs_pi16(__b, __c); +} + +static __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) +_mm_cvtps_pi8(__m128 __a) +{ + __m64 __b, __c; + + __b = _mm_cvtps_pi16(__a); + __c = _mm_setzero_si64(); + + return _mm_packs_pi16(__b, __c); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_mm_movemask_ps(__m128 __a) +{ + return __builtin_ia32_movmskps(__a); +} + +#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) + +#define _MM_EXCEPT_INVALID (0x0001) +#define _MM_EXCEPT_DENORM (0x0002) +#define _MM_EXCEPT_DIV_ZERO (0x0004) +#define _MM_EXCEPT_OVERFLOW (0x0008) +#define _MM_EXCEPT_UNDERFLOW (0x0010) +#define _MM_EXCEPT_INEXACT (0x0020) +#define _MM_EXCEPT_MASK (0x003f) + +#define _MM_MASK_INVALID (0x0080) +#define _MM_MASK_DENORM (0x0100) +#define _MM_MASK_DIV_ZERO (0x0200) +#define _MM_MASK_OVERFLOW (0x0400) +#define _MM_MASK_UNDERFLOW (0x0800) +#define _MM_MASK_INEXACT (0x1000) +#define _MM_MASK_MASK (0x1f80) + +#define _MM_ROUND_NEAREST (0x0000) +#define _MM_ROUND_DOWN (0x2000) +#define _MM_ROUND_UP (0x4000) +#define _MM_ROUND_TOWARD_ZERO (0x6000) +#define _MM_ROUND_MASK (0x6000) + +#define _MM_FLUSH_ZERO_MASK (0x8000) +#define _MM_FLUSH_ZERO_ON (0x8000) +#define _MM_FLUSH_ZERO_OFF (0x0000) + +#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK) +#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK) +#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK) +#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK) + +#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x))) +#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x))) +#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x))) +#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x))) + +#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ +do { \ + __m128 tmp3, tmp2, tmp1, tmp0; \ + tmp0 = _mm_unpacklo_ps((row0), (row1)); \ + tmp2 = _mm_unpacklo_ps((row2), (row3)); \ + tmp1 = _mm_unpackhi_ps((row0), (row1)); \ + tmp3 = _mm_unpackhi_ps((row2), (row3)); \ + (row0) = _mm_movelh_ps(tmp0, tmp2); \ + (row1) = _mm_movehl_ps(tmp2, tmp0); \ + (row2) = _mm_movelh_ps(tmp1, tmp3); \ + (row3) = _mm_movehl_ps(tmp3, tmp1); \ +} while (0) + +/* Aliases for compatibility. */ +#define _m_pextrw _mm_extract_pi16 +#define _m_pinsrw _mm_insert_pi16 +#define _m_pmaxsw _mm_max_pi16 +#define _m_pmaxub _mm_max_pu8 +#define _m_pminsw _mm_min_pi16 +#define _m_pminub _mm_min_pu8 +#define _m_pmovmskb _mm_movemask_pi8 +#define _m_pmulhuw _mm_mulhi_pu16 +#define _m_pshufw _mm_shuffle_pi16 +#define _m_maskmovq _mm_maskmove_si64 +#define _m_pavgb _mm_avg_pu8 +#define _m_pavgw _mm_avg_pu16 +#define _m_psadbw _mm_sad_pu8 +#define _m_ _mm_ +#define _m_ _mm_ + +/* Ugly hack for backwards-compatibility (compatible with gcc) */ +#ifdef __SSE2__ +#include +#endif + +#endif /* __SSE__ */ + +#endif /* __XMMINTRIN_H */ diff --git a/python/clang/5.1/include/xopintrin.h b/python/clang/5.1/include/xopintrin.h new file mode 100644 index 00000000..cc94ca02 --- /dev/null +++ b/python/clang/5.1/include/xopintrin.h @@ -0,0 +1,804 @@ +/*===---- xopintrin.h - XOP intrinsics -------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __XOPINTRIN_H +#define __XOPINTRIN_H + +#ifndef __XOP__ +# error "XOP instruction set is not enabled" +#else + +#include + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddw_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) +{ + return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); +} + +#define _mm_roti_epi8(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) + +#define _mm_roti_epi16(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) + +#define _mm_roti_epi32(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) + +#define _mm_roti_epi64(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); +} + +#define _mm_com_epu8(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) + +#define _mm_com_epu16(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) + +#define _mm_com_epu32(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) + +#define _mm_com_epu64(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) + +#define _mm_com_epi8(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) + +#define _mm_com_epi16(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) + +#define _mm_com_epi32(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) + +#define _mm_com_epi64(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) + +#define _MM_PCOMCTRL_LT 0 +#define _MM_PCOMCTRL_LE 1 +#define _MM_PCOMCTRL_GT 2 +#define _MM_PCOMCTRL_GE 3 +#define _MM_PCOMCTRL_EQ 4 +#define _MM_PCOMCTRL_NEQ 5 +#define _MM_PCOMCTRL_FALSE 6 +#define _MM_PCOMCTRL_TRUE 7 + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epu8(__m128i __A, __m128i __B) +{ + return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epu16(__m128i __A, __m128i __B) +{ + return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epu32(__m128i __A, __m128i __B) +{ + return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epu64(__m128i __A, __m128i __B) +{ + return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epi8(__m128i __A, __m128i __B) +{ + return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epi16(__m128i __A, __m128i __B) +{ + return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epi32(__m128i __A, __m128i __B) +{ + return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comlt_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comle_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comgt_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comge_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comeq_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comneq_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comfalse_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_comtrue_epi64(__m128i __A, __m128i __B) +{ + return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); +} + +#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ + __m128d __X = (X); \ + __m128d __Y = (Y); \ + __m128i __C = (C); \ + (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ + (__v2di)__C, (I)); }) + +#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ + __m256d __X = (X); \ + __m256d __Y = (Y); \ + __m256i __C = (C); \ + (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ + (__v4di)__C, (I)); }) + +#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ + __m128 __X = (X); \ + __m128 __Y = (Y); \ + __m128i __C = (C); \ + (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ + (__v4si)__C, (I)); }) + +#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ + __m256 __X = (X); \ + __m256 __Y = (Y); \ + __m256i __C = (C); \ + (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ + (__v8si)__C, (I)); }) + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_ss(__m128 __A) +{ + return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_sd(__m128d __A) +{ + return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_ps(__m128 __A) +{ + return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_pd(__m128d __A) +{ + return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_frcz_ps(__m256 __A) +{ + return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_frcz_pd(__m256d __A) +{ + return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); +} + +#endif /* __XOP__ */ + +#endif /* __XOPINTRIN_H */