avx2intrin.h revision 925be547b163675b312e3cac0cc7f37f31d787c1
1925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== 2925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 3925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * Permission is hereby granted, free of charge, to any person obtaining a copy 4925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * of this software and associated documentation files (the "Software"), to deal 5925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * in the Software without restriction, including without limitation the rights 6925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * copies of the Software, and to permit persons to whom the Software is 8925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * furnished to do so, subject to the following conditions: 9925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 10925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * The above copyright notice and this permission notice shall be included in 11925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * all copies or substantial portions of the Software. 12925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 13925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * THE SOFTWARE. 20925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper * 21925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper *===-----------------------------------------------------------------------=== 22925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper */ 23925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 24925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#ifndef __IMMINTRIN_H 25925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead." 26925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#endif 27925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 28925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper/* SSE4 Multiple Packed Sums of Absolute Difference. */ 29925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper#define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M)) 30925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 31925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 32925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi8(__m256i a) 33925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 34925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsb256((__v32qi)a); 35925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 36925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 37925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 38925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi16(__m256i a) 39925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 40925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsw256((__v16hi)a); 41925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 42925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 43925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 44925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_abs_epi32(__m256i a) 45925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 46925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_pabsd256((__v8si)a); 47925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 48925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 49925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 50925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packs_epi16(__m256i a, __m256i b) 51925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 52925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packsswb256((__v16hi)a, (__v16hi)b); 53925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 54925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 55925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 56925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packs_epi32(__m256i a, __m256i b) 57925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 58925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packssdw256((__v8si)a, (__v8si)b); 59925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 60925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 61925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 62925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi16(__m256i a, __m256i b) 63925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 64925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)__builtin_ia32_packuswb256((__v16hi)a, (__v16hi)b); 65925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 66925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 67925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 68925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_packus_epi32(__m256i __V1, __m256i __V2) 69925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 70925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); 71925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 72925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 73925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 74925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi8(__m256i a, __m256i b) 75925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 76925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v32qi)a + (__v32qi)b); 77925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 78925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 79925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 80925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi16(__m256i a, __m256i b) 81925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 82925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v16hi)a + (__v16hi)b); 83925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 84925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 85925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 86925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi32(__m256i a, __m256i b) 87925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 88925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v8si)a + (__v8si)b); 89925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 90925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 91925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 92925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_add_epi64(__m256i a, __m256i b) 93925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 94925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return a + b; 95925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 96925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 97925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 98925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi8(__m256i a, __m256i b) 99925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 100925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v32qi)a - (__v32qi)b); 101925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 102925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 103925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 104925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi16(__m256i a, __m256i b) 105925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 106925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v16hi)a - (__v16hi)b); 107925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 108925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 109925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 110925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi32(__m256i a, __m256i b) 111925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 112925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return (__m256i)((__v8si)a - (__v8si)b); 113925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 114925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper 115925be547b163675b312e3cac0cc7f37f31d787c1Craig Topperstatic __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 116925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper_mm256_sub_epi64(__m256i a, __m256i b) 117925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper{ 118925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper return a - b; 119925be547b163675b312e3cac0cc7f37f31d787c1Craig Topper} 120