mmintrin.h revision edbd9289b71cd512ae2b3a9b46cdf55b5602aaba
1/*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Copyright (c) 2008 Anders Carlsson 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a copy 6 * of this software and associated documentation files (the "Software"), to deal 7 * in the Software without restriction, including without limitation the rights 8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 * copies of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 * THE SOFTWARE. 22 * 23 *===-----------------------------------------------------------------------=== 24 */ 25 26#ifndef __MMINTRIN_H 27#define __MMINTRIN_H 28 29#ifndef __MMX__ 30#error "MMX instruction set not enabled" 31#else 32 33typedef long long __m64 __attribute__((__vector_size__(8))); 34 35typedef int __v2si __attribute__((__vector_size__(8))); 36typedef short __v4hi __attribute__((__vector_size__(8))); 37typedef char __v8qi __attribute__((__vector_size__(8))); 38 39inline void __attribute__((__always_inline__)) _mm_empty() 40{ 41 __builtin_ia32_emms(); 42} 43 44inline __m64 __attribute__((__always_inline__)) _mm_cvtsi32_si64(int __i) 45{ 46 return (__m64)(__v2si){__i, 0}; 47} 48 49inline int __attribute__((__always_inline__)) _mm_cvtsi64_si32(__m64 __m) 50{ 51 __v2si __mmx_var2 = (__v2si)__m; 52 return __mmx_var2[0]; 53} 54 55inline __m64 __attribute__((__always_inline__)) _mm_cvtsi64_m64(long long __i) 56{ 57 return (__m64)__i; 58} 59 60inline long long __attribute__((__always_inline__)) _mm_cvtm64_si64(__m64 __m) 61{ 62 return (long long)__m; 63} 64 65inline __m64 __attribute__((__always_inline__)) _mm_packs_pi16(__m64 __m1, __m64 __m2) 66{ 67 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 68} 69 70inline __m64 __attribute__((__always_inline__)) _mm_packs_pi32(__m64 __m1, __m64 __m2) 71{ 72 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 73} 74 75inline __m64 __attribute__((__always_inline__)) _mm_packs_pu16(__m64 __m1, __m64 __m2) 76{ 77 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 78} 79 80inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 81{ 82 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); 83} 84 85inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 86{ 87 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 4+2, 3, 4+3); 88} 89 90inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 91{ 92 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 2+1); 93} 94 95inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 96{ 97 return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); 98} 99 100inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 101{ 102 return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4+0, 1, 4+1); 103} 104 105inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 106{ 107 return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2+0); 108} 109 110inline __m64 __attribute__((__always_inline__)) _mm_add_pi8(__m64 __m1, __m64 __m2) 111{ 112 return (__m64)((__v8qi)__m1 + (__v8qi)__m2); 113} 114 115inline __m64 __attribute__((__always_inline__)) _mm_add_pi16(__m64 __m1, __m64 __m2) 116{ 117 return (__m64)((__v4hi)__m1 + (__v4hi)__m2); 118} 119 120inline __m64 __attribute__((__always_inline__)) _mm_add_pi32(__m64 __m1, __m64 __m2) 121{ 122 return (__m64)((__v2si)__m1 + (__v2si)__m2); 123} 124 125inline __m64 __attribute__((__always_inline__)) _mm_adds_pi8(__m64 __m1, __m64 __m2) 126{ 127 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 128} 129 130inline __m64 __attribute__((__always_inline__)) _mm_adds_pi16(__m64 __m1, __m64 __m2) 131{ 132 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 133} 134 135inline __m64 __attribute__((__always_inline__)) _mm_adds_pu8(__m64 __m1, __m64 __m2) 136{ 137 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 138} 139 140inline __m64 __attribute__((__always_inline__)) _mm_adds_pu16(__m64 __m1, __m64 __m2) 141{ 142 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 143} 144 145inline __m64 __attribute__((__always_inline__)) _mm_sub_pi8(__m64 __m1, __m64 __m2) 146{ 147 return (__m64)((__v8qi)__m1 - (__v8qi)__m2); 148} 149 150inline __m64 __attribute__((__always_inline__)) _mm_sub_pi16(__m64 __m1, __m64 __m2) 151{ 152 return (__m64)((__v4hi)__m1 - (__v4hi)__m2); 153} 154 155inline __m64 __attribute__((__always_inline__)) _mm_sub_pi32(__m64 __m1, __m64 __m2) 156{ 157 return (__m64)((__v2si)__m1 - (__v2si)__m2); 158} 159 160inline __m64 __attribute__((__always_inline__)) _mm_subs_pi8(__m64 __m1, __m64 __m2) 161{ 162 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 163} 164 165inline __m64 __attribute__((__always_inline__)) _mm_subs_pi16(__m64 __m1, __m64 __m2) 166{ 167 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 168} 169 170inline __m64 __attribute__((__always_inline__)) _mm_subs_pu8(__m64 __m1, __m64 __m2) 171{ 172 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 173} 174 175inline __m64 __attribute__((__always_inline__)) _mm_subs_pu16(__m64 __m1, __m64 __m2) 176{ 177 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 178} 179 180inline __m64 __attribute__((__always_inline__)) _mm_madd_pi16(__m64 __m1, __m64 __m2) 181{ 182 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 183} 184 185inline __m64 __attribute__((__always_inline__)) _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 186{ 187 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 188} 189 190inline __m64 __attribute__((__always_inline__)) _mm_mullo_pi16(__m64 __m1, __m64 __m2) 191{ 192 return (__m64)((__v4hi)__m1 * (__v4hi)__m2); 193} 194 195inline __m64 __attribute__((__always_inline__)) _mm_sll_pi16(__m64 __m, __m64 __count) 196{ 197 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 198} 199 200inline __m64 __attribute__((__always_inline__)) _mm_slli_pi16(__m64 __m, int __count) 201{ 202 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 203} 204 205inline __m64 __attribute__((__always_inline__)) _mm_sll_pi32(__m64 __m, __m64 __count) 206{ 207 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 208} 209 210inline __m64 __attribute__((__always_inline__)) _mm_slli_pi32(__m64 __m, int __count) 211{ 212 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 213} 214 215inline __m64 __attribute__((__always_inline__)) _mm_sll_pi64(__m64 __m, __m64 __count) 216{ 217 return __builtin_ia32_psllq(__m, __count); 218} 219 220inline __m64 __attribute__((__always_inline__)) _mm_slli_pi64(__m64 __m, int __count) 221{ 222 return __builtin_ia32_psllqi(__m, __count); 223} 224 225inline __m64 __attribute__((__always_inline__)) _mm_sra_pi16(__m64 __m, __m64 __count) 226{ 227 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 228} 229 230inline __m64 __attribute__((__always_inline__)) _mm_srai_pi16(__m64 __m, int __count) 231{ 232 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 233} 234 235inline __m64 __attribute__((__always_inline__)) _mm_sra_pi32(__m64 __m, __m64 __count) 236{ 237 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 238} 239 240inline __m64 __attribute__((__always_inline__)) _mm_srai_pi32(__m64 __m, int __count) 241{ 242 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 243} 244 245inline __m64 __attribute__((__always_inline__)) _mm_srl_pi16(__m64 __m, __m64 __count) 246{ 247 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 248} 249 250inline __m64 __attribute__((__always_inline__)) _mm_srli_pi16(__m64 __m, int __count) 251{ 252 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 253} 254 255inline __m64 __attribute__((__always_inline__)) _mm_srl_pi32(__m64 __m, __m64 __count) 256{ 257 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 258} 259 260inline __m64 __attribute__((__always_inline__)) _mm_srli_pi32(__m64 __m, int __count) 261{ 262 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 263} 264 265inline __m64 __attribute__((__always_inline__)) _mm_srl_pi64(__m64 __m, __m64 __count) 266{ 267 return (__m64)__builtin_ia32_psrlq(__m, __count); 268} 269 270inline __m64 __attribute__((__always_inline__)) _mm_srli_pi64(__m64 __m, int __count) 271{ 272 return __builtin_ia32_psrlqi(__m, __count); 273} 274 275inline __m64 __attribute__((__always_inline__)) _mm_and_si64(__m64 __m1, __m64 __m2) 276{ 277 return __m1 & __m2; 278} 279 280inline __m64 __attribute__((__always_inline__)) _mm_andnot_si64(__m64 __m1, __m64 __m2) 281{ 282 return ~__m1 & __m2; 283} 284 285inline __m64 __attribute__((__always_inline__)) _mm_or_si64(__m64 __m1, __m64 __m2) 286{ 287 return __m1 | __m2; 288} 289 290inline __m64 __attribute__((__always_inline__)) _mm_xor_si64(__m64 __m1, __m64 __m2) 291{ 292 return __m1 ^ __m2; 293} 294 295inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 296{ 297 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 298} 299 300inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 301{ 302 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 303} 304 305inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 306{ 307 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 308} 309 310inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 311{ 312 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 313} 314 315inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 316{ 317 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 318} 319 320inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 321{ 322 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 323} 324 325inline __m64 __attribute__((__always_inline__)) _mm_setzero_si64() 326{ 327 return (__m64){ 0LL }; 328} 329 330inline __m64 __attribute__((__always_inline__)) _mm_set_pi32(int __i1, int __i0) 331{ 332 return (__m64)(__v2si){ __i0, __i1 }; 333} 334 335inline __m64 __attribute__((__always_inline__)) _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 336{ 337 return (__m64)(__v4hi){ __s0, __s1, __s2, __s3 }; 338} 339 340inline __m64 __attribute__((__always_inline__)) _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) 341{ 342 return (__m64)(__v8qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7 }; 343} 344 345inline __m64 __attribute__((__always_inline__)) _mm_set1_pi32(int __i) 346{ 347 return (__m64)(__v2si){ __i, __i }; 348} 349 350inline __m64 __attribute__((__always_inline__)) _mm_set1_pi16(short __s) 351{ 352 return (__m64)(__v4hi){ __s }; 353} 354 355inline __m64 __attribute__((__always_inline__)) _mm_set1_pi8(char __b) 356{ 357 return (__m64)(__v8qi){ __b }; 358} 359 360inline __m64 __attribute__((__always_inline__)) _mm_setr_pi32(int __i1, int __i0) 361{ 362 return (__m64)(__v2si){ __i1, __i0 }; 363} 364 365inline __m64 __attribute__((__always_inline__)) _mm_setr_pi16(short __s3, short __s2, short __s1, short __s0) 366{ 367 return (__m64)(__v4hi){ __s3, __s2, __s1, __s0 }; 368} 369 370inline __m64 __attribute__((__always_inline__)) _mm_setr_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) 371{ 372 return (__m64)(__v8qi){ __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0 }; 373} 374 375#endif /* __MMX__ */ 376 377#endif /* __MMINTRIN_H */ 378 379