1/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */
2/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007
3   Free Software Foundation, Inc.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING.  If not, write to
19   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20   Boston, MA 02110-1301, USA.  */
21
22/* As a special exception, if you include this header file into source
23   files compiled by GCC, this header file does not by itself cause
24   the resulting executable to be covered by the GNU General Public
25   License.  This exception does not however invalidate any other
26   reasons why the executable file might be covered by the GNU General
27   Public License.  */
28
29/* Implemented from the specification included in the Intel C++ Compiler
30   User Guide and Reference, version 9.0.  */
31
32#ifndef _XMMINTRIN_H_INCLUDED
33#define _XMMINTRIN_H_INCLUDED
34
35#ifndef __SSE__
36# error "SSE instruction set not enabled"
37#else
38
39/* We need type definitions from the MMX header file.  */
40#include <mmintrin.h>
41
42/* Get _mm_malloc () and _mm_free ().  */
43/* APPLE LOCAL begin xmmintrin.h for kernel 4123064 */
44#if __STDC_HOSTED__
45#include <mm_malloc.h>
46#endif
47/* APPLE LOCAL end xmmintrin.h for kernel 4123064 */
48
49/* The Intel API is flexible enough that we must allow aliasing with other
50   vector types, and their scalar components.  */
51typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
52
53/* Internal data types for implementing the intrinsics.  */
54typedef float __v4sf __attribute__ ((__vector_size__ (16)));
55
56#if defined(__clang__) && defined(WITH_SYNTAX_CHECK)
57/* Workaround for "clang -fsyntax-only" happens to use this header, but may
58 * choke on something not supported in clang
59 */
60int __builtin_ia32_cvtss2si (__v4sf);
61int __builtin_ia32_cvttss2si (__v4sf);
62__m128 __builtin_ia32_addps (__v4sf, __v4sf);
63__m128 __builtin_ia32_addss (__v4sf, __v4sf);
64__m128 __builtin_ia32_addss (__v4sf, __v4sf);
65__m128 __builtin_ia32_addss (__v4sf, __v4sf);
66__m128 __builtin_ia32_andnps (__m128, __m128);
67__m128 __builtin_ia32_andps (__m128, __m128);
68__m128 __builtin_ia32_cmpeqps (__v4sf, __v4sf);
69__m128 __builtin_ia32_cmpeqss (__v4sf, __v4sf);
70__m128 __builtin_ia32_cmpgeps (__v4sf, __v4sf);
71__m128 __builtin_ia32_cmpgtps (__v4sf, __v4sf);
72__m128 __builtin_ia32_cmpleps (__v4sf, __v4sf);
73__m128 __builtin_ia32_cmpless (__v4sf, __v4sf);
74__m128 __builtin_ia32_cmpltps (__v4sf, __v4sf);
75__m128 __builtin_ia32_cmpltss (__v4sf, __v4sf);
76__m128 __builtin_ia32_cmpneqps (__v4sf, __v4sf);
77__m128 __builtin_ia32_cmpneqss (__v4sf, __v4sf);
78__m128 __builtin_ia32_cmpngeps (__v4sf, __v4sf);
79__m128 __builtin_ia32_cmpngtps (__v4sf, __v4sf);
80__m128 __builtin_ia32_cmpnleps (__v4sf, __v4sf);
81__m128 __builtin_ia32_cmpnless (__v4sf, __v4sf);
82__m128 __builtin_ia32_cmpnltps (__v4sf, __v4sf);
83__m128 __builtin_ia32_cmpnltss (__v4sf, __v4sf);
84__m128 __builtin_ia32_cmpordps (__v4sf, __v4sf);
85__m128 __builtin_ia32_cmpordss (__v4sf, __v4sf);
86__m128 __builtin_ia32_cmpunordps (__v4sf, __v4sf);
87__m128 __builtin_ia32_cmpunordss (__v4sf, __v4sf);
88__m128 __builtin_ia32_cvtsi2ss (__v4sf, int);
89__m128 __builtin_ia32_divps (__v4sf, __v4sf);
90__m128 __builtin_ia32_divss (__v4sf, __v4sf);
91__m128 __builtin_ia32_movss (__v4sf, __v4sf);
92__m128 __builtin_ia32_mulps (__v4sf, __v4sf);
93__m128 __builtin_ia32_mulps (__v4sf, __v4sf);
94__m128 __builtin_ia32_mulss (__v4sf, __v4sf);
95__m128 __builtin_ia32_mulss (__v4sf, __v4sf);
96__m128 __builtin_ia32_orps (__m128, __m128);
97__m128 __builtin_ia32_subps (__v4sf, __v4sf);
98__m128 __builtin_ia32_subss (__v4sf, __v4sf);
99__m128 __builtin_ia32_subss (__v4sf, __v4sf);
100__m128 __builtin_ia32_xorps (__m128, __m128);
101__m128 __builtin_ia32_loadhps (__v4sf, const __v2si *);
102__m128 __builtin_ia32_loadlps (__v4sf, const __v2si *);
103__m128 __builtin_ia32_movhlps (__v4sf, __v4sf);
104__m128 __builtin_ia32_movlhps (__v4sf, __v4sf);
105__m128 __builtin_ia32_shufps (__v4sf, __v4sf, int const);
106__m128 __builtin_ia32_unpckhps (__v4sf, __v4sf);
107__m128 __builtin_ia32_unpcklps (__v4sf, __v4sf);
108__m128 __builtin_ia32_loadups (float const *);
109__m64 __builtin_ia32_vec_set_v4hi (__v4hi, int const, int const);
110float __builtin_ia32_vec_ext_v4sf (__v4sf, const int);
111int __builtin_ia32_vec_ext_v4hi (__v4hi, const int);
112long long __builtin_ia32_cvtss2si64 (__v4sf);
113long long __builtin_ia32_cvttss2si64 (__v4sf);
114__m128 __builtin_ia32_cvtsi642ss (__v4sf, long long);
115#endif
116
117/* Create a selector for use with the SHUFPS instruction.  */
118#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
119 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
120
121/* Constants for use with _mm_prefetch.  */
122enum _mm_hint
123{
124  _MM_HINT_T0 = 3,
125  _MM_HINT_T1 = 2,
126  _MM_HINT_T2 = 1,
127  _MM_HINT_NTA = 0
128};
129
130/* Bits in the MXCSR.  */
131#define _MM_EXCEPT_MASK       0x003f
132#define _MM_EXCEPT_INVALID    0x0001
133#define _MM_EXCEPT_DENORM     0x0002
134#define _MM_EXCEPT_DIV_ZERO   0x0004
135#define _MM_EXCEPT_OVERFLOW   0x0008
136#define _MM_EXCEPT_UNDERFLOW  0x0010
137#define _MM_EXCEPT_INEXACT    0x0020
138
139#define _MM_MASK_MASK         0x1f80
140#define _MM_MASK_INVALID      0x0080
141#define _MM_MASK_DENORM       0x0100
142#define _MM_MASK_DIV_ZERO     0x0200
143#define _MM_MASK_OVERFLOW     0x0400
144#define _MM_MASK_UNDERFLOW    0x0800
145#define _MM_MASK_INEXACT      0x1000
146
147#define _MM_ROUND_MASK        0x6000
148#define _MM_ROUND_NEAREST     0x0000
149#define _MM_ROUND_DOWN        0x2000
150#define _MM_ROUND_UP          0x4000
151#define _MM_ROUND_TOWARD_ZERO 0x6000
152
153#define _MM_FLUSH_ZERO_MASK   0x8000
154#define _MM_FLUSH_ZERO_ON     0x8000
155#define _MM_FLUSH_ZERO_OFF    0x0000
156
157/* APPLE LOCAL begin nodebug inline 4152603 */
158#define __always_inline__ __always_inline__, __nodebug__
159/* APPLE LOCAL end nodebug inline 4152603 */
160
161/* APPLE LOCAL begin radar 5618945 */
162#undef __STATIC_INLINE
163#ifdef __GNUC_STDC_INLINE__
164#define __STATIC_INLINE __inline
165#else
166#define __STATIC_INLINE static __inline
167#endif
168/* APPLE LOCAL end radar 5618945 */
169
170/* Create a vector of zeros.  */
171/* APPLE LOCAL begin radar 4152603 */
172/* APPLE LOCAL begin radar 5618945 */
173__STATIC_INLINE __m128 __attribute__((__always_inline__))
174/* APPLE LOCAL end radar 5618945 */
175_mm_setzero_ps (void)
176{
177  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
178}
179
180/* Perform the respective operation on the lower SPFP (single-precision
181   floating-point) values of A and B; the upper three SPFP values are
182   passed through from A.  */
183
184/* APPLE LOCAL begin radar 5618945 */
185__STATIC_INLINE __m128 __attribute__((__always_inline__))
186/* APPLE LOCAL end radar 5618945 */
187_mm_add_ss (__m128 __A, __m128 __B)
188{
189  return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
190}
191
192/* APPLE LOCAL begin radar 5618945 */
193__STATIC_INLINE __m128 __attribute__((__always_inline__))
194/* APPLE LOCAL end radar 5618945 */
195_mm_sub_ss (__m128 __A, __m128 __B)
196{
197  return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
198}
199
200/* APPLE LOCAL begin radar 5618945 */
201__STATIC_INLINE __m128 __attribute__((__always_inline__))
202/* APPLE LOCAL end radar 5618945 */
203_mm_mul_ss (__m128 __A, __m128 __B)
204{
205  return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
206}
207
208/* APPLE LOCAL begin radar 5618945 */
209__STATIC_INLINE __m128 __attribute__((__always_inline__))
210/* APPLE LOCAL end radar 5618945 */
211_mm_div_ss (__m128 __A, __m128 __B)
212{
213  return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
214}
215
216/* APPLE LOCAL begin radar 5618945 */
217__STATIC_INLINE __m128 __attribute__((__always_inline__))
218/* APPLE LOCAL end radar 5618945 */
219_mm_sqrt_ss (__m128 __A)
220{
221  return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
222}
223
224/* APPLE LOCAL begin radar 5618945 */
225__STATIC_INLINE __m128 __attribute__((__always_inline__))
226/* APPLE LOCAL end radar 5618945 */
227_mm_rcp_ss (__m128 __A)
228{
229  return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
230}
231
232/* APPLE LOCAL begin radar 5618945 */
233__STATIC_INLINE __m128 __attribute__((__always_inline__))
234/* APPLE LOCAL end radar 5618945 */
235_mm_rsqrt_ss (__m128 __A)
236{
237  return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
238}
239
240/* APPLE LOCAL begin radar 5618945 */
241__STATIC_INLINE __m128 __attribute__((__always_inline__))
242/* APPLE LOCAL end radar 5618945 */
243_mm_min_ss (__m128 __A, __m128 __B)
244{
245  return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
246}
247
248/* APPLE LOCAL begin radar 5618945 */
249__STATIC_INLINE __m128 __attribute__((__always_inline__))
250/* APPLE LOCAL end radar 5618945 */
251_mm_max_ss (__m128 __A, __m128 __B)
252{
253  return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
254}
255
256/* Perform the respective operation on the four SPFP values in A and B.  */
257
258/* APPLE LOCAL begin radar 5618945 */
259__STATIC_INLINE __m128 __attribute__((__always_inline__))
260/* APPLE LOCAL end radar 5618945 */
261_mm_add_ps (__m128 __A, __m128 __B)
262{
263  return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
264}
265
266/* APPLE LOCAL begin radar 5618945 */
267__STATIC_INLINE __m128 __attribute__((__always_inline__))
268/* APPLE LOCAL end radar 5618945 */
269_mm_sub_ps (__m128 __A, __m128 __B)
270{
271  return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
272}
273
274/* APPLE LOCAL begin radar 5618945 */
275__STATIC_INLINE __m128 __attribute__((__always_inline__))
276/* APPLE LOCAL end radar 5618945 */
277_mm_mul_ps (__m128 __A, __m128 __B)
278{
279  return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
280}
281
282/* APPLE LOCAL begin radar 5618945 */
283__STATIC_INLINE __m128 __attribute__((__always_inline__))
284/* APPLE LOCAL end radar 5618945 */
285_mm_div_ps (__m128 __A, __m128 __B)
286{
287  return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
288}
289
290/* APPLE LOCAL begin radar 5618945 */
291__STATIC_INLINE __m128 __attribute__((__always_inline__))
292/* APPLE LOCAL end radar 5618945 */
293_mm_sqrt_ps (__m128 __A)
294{
295  return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
296}
297
298/* APPLE LOCAL begin radar 5618945 */
299__STATIC_INLINE __m128 __attribute__((__always_inline__))
300/* APPLE LOCAL end radar 5618945 */
301_mm_rcp_ps (__m128 __A)
302{
303  return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
304}
305
306/* APPLE LOCAL begin radar 5618945 */
307__STATIC_INLINE __m128 __attribute__((__always_inline__))
308/* APPLE LOCAL end radar 5618945 */
309_mm_rsqrt_ps (__m128 __A)
310{
311  return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
312}
313
314/* APPLE LOCAL begin radar 5618945 */
315__STATIC_INLINE __m128 __attribute__((__always_inline__))
316/* APPLE LOCAL end radar 5618945 */
317_mm_min_ps (__m128 __A, __m128 __B)
318{
319  return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
320}
321
322/* APPLE LOCAL begin radar 5618945 */
323__STATIC_INLINE __m128 __attribute__((__always_inline__))
324/* APPLE LOCAL end radar 5618945 */
325_mm_max_ps (__m128 __A, __m128 __B)
326{
327  return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
328}
329
330/* Perform logical bit-wise operations on 128-bit values.  */
331
332/* APPLE LOCAL begin radar 5618945 */
333__STATIC_INLINE __m128 __attribute__((__always_inline__))
334/* APPLE LOCAL end radar 5618945 */
335_mm_and_ps (__m128 __A, __m128 __B)
336{
337  return __builtin_ia32_andps (__A, __B);
338}
339
340/* APPLE LOCAL begin radar 5618945 */
341__STATIC_INLINE __m128 __attribute__((__always_inline__))
342/* APPLE LOCAL end radar 5618945 */
343_mm_andnot_ps (__m128 __A, __m128 __B)
344{
345  return __builtin_ia32_andnps (__A, __B);
346}
347
348/* APPLE LOCAL begin radar 5618945 */
349__STATIC_INLINE __m128 __attribute__((__always_inline__))
350/* APPLE LOCAL end radar 5618945 */
351_mm_or_ps (__m128 __A, __m128 __B)
352{
353  return __builtin_ia32_orps (__A, __B);
354}
355
356/* APPLE LOCAL begin radar 5618945 */
357__STATIC_INLINE __m128 __attribute__((__always_inline__))
358/* APPLE LOCAL end radar 5618945 */
359_mm_xor_ps (__m128 __A, __m128 __B)
360{
361  return __builtin_ia32_xorps (__A, __B);
362}
363
364/* Perform a comparison on the lower SPFP values of A and B.  If the
365   comparison is true, place a mask of all ones in the result, otherwise a
366   mask of zeros.  The upper three SPFP values are passed through from A.  */
367
368/* APPLE LOCAL begin radar 5618945 */
369__STATIC_INLINE __m128 __attribute__((__always_inline__))
370/* APPLE LOCAL end radar 5618945 */
371_mm_cmpeq_ss (__m128 __A, __m128 __B)
372{
373  return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
374}
375
376/* APPLE LOCAL begin radar 5618945 */
377__STATIC_INLINE __m128 __attribute__((__always_inline__))
378/* APPLE LOCAL end radar 5618945 */
379_mm_cmplt_ss (__m128 __A, __m128 __B)
380{
381  return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
382}
383
384/* APPLE LOCAL begin radar 5618945 */
385__STATIC_INLINE __m128 __attribute__((__always_inline__))
386/* APPLE LOCAL end radar 5618945 */
387_mm_cmple_ss (__m128 __A, __m128 __B)
388{
389  return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
390}
391
392/* APPLE LOCAL begin radar 5618945 */
393__STATIC_INLINE __m128 __attribute__((__always_inline__))
394/* APPLE LOCAL end radar 5618945 */
395_mm_cmpgt_ss (__m128 __A, __m128 __B)
396{
397  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
398					(__v4sf)
399					__builtin_ia32_cmpltss ((__v4sf) __B,
400								(__v4sf)
401								__A));
402}
403
404/* APPLE LOCAL begin radar 5618945 */
405__STATIC_INLINE __m128 __attribute__((__always_inline__))
406/* APPLE LOCAL end radar 5618945 */
407_mm_cmpge_ss (__m128 __A, __m128 __B)
408{
409  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
410					(__v4sf)
411					__builtin_ia32_cmpless ((__v4sf) __B,
412								(__v4sf)
413								__A));
414}
415
416/* APPLE LOCAL begin radar 5618945 */
417__STATIC_INLINE __m128 __attribute__((__always_inline__))
418/* APPLE LOCAL end radar 5618945 */
419_mm_cmpneq_ss (__m128 __A, __m128 __B)
420{
421  return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
422}
423
424/* APPLE LOCAL begin radar 5618945 */
425__STATIC_INLINE __m128 __attribute__((__always_inline__))
426/* APPLE LOCAL end radar 5618945 */
427_mm_cmpnlt_ss (__m128 __A, __m128 __B)
428{
429  return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
430}
431
432/* APPLE LOCAL begin radar 5618945 */
433__STATIC_INLINE __m128 __attribute__((__always_inline__))
434/* APPLE LOCAL end radar 5618945 */
435_mm_cmpnle_ss (__m128 __A, __m128 __B)
436{
437  return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
438}
439
440/* APPLE LOCAL begin radar 5618945 */
441__STATIC_INLINE __m128 __attribute__((__always_inline__))
442/* APPLE LOCAL end radar 5618945 */
443_mm_cmpngt_ss (__m128 __A, __m128 __B)
444{
445  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
446					(__v4sf)
447					__builtin_ia32_cmpnltss ((__v4sf) __B,
448								 (__v4sf)
449								 __A));
450}
451
452/* APPLE LOCAL begin radar 5618945 */
453__STATIC_INLINE __m128 __attribute__((__always_inline__))
454/* APPLE LOCAL end radar 5618945 */
455_mm_cmpnge_ss (__m128 __A, __m128 __B)
456{
457  return (__m128) __builtin_ia32_movss ((__v4sf) __A,
458					(__v4sf)
459					__builtin_ia32_cmpnless ((__v4sf) __B,
460								 (__v4sf)
461								 __A));
462}
463
464/* APPLE LOCAL begin radar 5618945 */
465__STATIC_INLINE __m128 __attribute__((__always_inline__))
466/* APPLE LOCAL end radar 5618945 */
467_mm_cmpord_ss (__m128 __A, __m128 __B)
468{
469  return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
470}
471
472/* APPLE LOCAL begin radar 5618945 */
473__STATIC_INLINE __m128 __attribute__((__always_inline__))
474/* APPLE LOCAL end radar 5618945 */
475_mm_cmpunord_ss (__m128 __A, __m128 __B)
476{
477  return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
478}
479
480/* Perform a comparison on the four SPFP values of A and B.  For each
481   element, if the comparison is true, place a mask of all ones in the
482   result, otherwise a mask of zeros.  */
483
484/* APPLE LOCAL begin radar 5618945 */
485__STATIC_INLINE __m128 __attribute__((__always_inline__))
486/* APPLE LOCAL end radar 5618945 */
487_mm_cmpeq_ps (__m128 __A, __m128 __B)
488{
489  return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
490}
491
492/* APPLE LOCAL begin radar 5618945 */
493__STATIC_INLINE __m128 __attribute__((__always_inline__))
494/* APPLE LOCAL end radar 5618945 */
495_mm_cmplt_ps (__m128 __A, __m128 __B)
496{
497  return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
498}
499
500/* APPLE LOCAL begin radar 5618945 */
501__STATIC_INLINE __m128 __attribute__((__always_inline__))
502/* APPLE LOCAL end radar 5618945 */
503_mm_cmple_ps (__m128 __A, __m128 __B)
504{
505  return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
506}
507
508/* APPLE LOCAL begin radar 5618945 */
509__STATIC_INLINE __m128 __attribute__((__always_inline__))
510/* APPLE LOCAL end radar 5618945 */
511_mm_cmpgt_ps (__m128 __A, __m128 __B)
512{
513  return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
514}
515
516/* APPLE LOCAL begin radar 5618945 */
517__STATIC_INLINE __m128 __attribute__((__always_inline__))
518/* APPLE LOCAL end radar 5618945 */
519_mm_cmpge_ps (__m128 __A, __m128 __B)
520{
521  return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
522}
523
524/* APPLE LOCAL begin radar 5618945 */
525__STATIC_INLINE __m128 __attribute__((__always_inline__))
526/* APPLE LOCAL end radar 5618945 */
527_mm_cmpneq_ps (__m128 __A, __m128 __B)
528{
529  return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
530}
531
532/* APPLE LOCAL begin radar 5618945 */
533__STATIC_INLINE __m128 __attribute__((__always_inline__))
534/* APPLE LOCAL end radar 5618945 */
535_mm_cmpnlt_ps (__m128 __A, __m128 __B)
536{
537  return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
538}
539
540/* APPLE LOCAL begin radar 5618945 */
541__STATIC_INLINE __m128 __attribute__((__always_inline__))
542/* APPLE LOCAL end radar 5618945 */
543_mm_cmpnle_ps (__m128 __A, __m128 __B)
544{
545  return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
546}
547
548/* APPLE LOCAL begin radar 5618945 */
549__STATIC_INLINE __m128 __attribute__((__always_inline__))
550/* APPLE LOCAL end radar 5618945 */
551_mm_cmpngt_ps (__m128 __A, __m128 __B)
552{
553  return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
554}
555
556/* APPLE LOCAL begin radar 5618945 */
557__STATIC_INLINE __m128 __attribute__((__always_inline__))
558/* APPLE LOCAL end radar 5618945 */
559_mm_cmpnge_ps (__m128 __A, __m128 __B)
560{
561  return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
562}
563
564/* APPLE LOCAL begin radar 5618945 */
565__STATIC_INLINE __m128 __attribute__((__always_inline__))
566/* APPLE LOCAL end radar 5618945 */
567_mm_cmpord_ps (__m128 __A, __m128 __B)
568{
569  return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
570}
571
572/* APPLE LOCAL begin radar 5618945 */
573__STATIC_INLINE __m128 __attribute__((__always_inline__))
574/* APPLE LOCAL end radar 5618945 */
575_mm_cmpunord_ps (__m128 __A, __m128 __B)
576{
577  return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
578}
579
580/* Compare the lower SPFP values of A and B and return 1 if true
581   and 0 if false.  */
582
583/* APPLE LOCAL begin radar 5618945 */
584__STATIC_INLINE int __attribute__((__always_inline__))
585/* APPLE LOCAL end radar 5618945 */
586_mm_comieq_ss (__m128 __A, __m128 __B)
587{
588  return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
589}
590
591/* APPLE LOCAL begin radar 5618945 */
592__STATIC_INLINE int __attribute__((__always_inline__))
593/* APPLE LOCAL end radar 5618945 */
594_mm_comilt_ss (__m128 __A, __m128 __B)
595{
596  return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
597}
598
599/* APPLE LOCAL begin radar 5618945 */
600__STATIC_INLINE int __attribute__((__always_inline__))
601/* APPLE LOCAL end radar 5618945 */
602_mm_comile_ss (__m128 __A, __m128 __B)
603{
604  return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
605}
606
607/* APPLE LOCAL begin radar 5618945 */
608__STATIC_INLINE int __attribute__((__always_inline__))
609/* APPLE LOCAL end radar 5618945 */
610_mm_comigt_ss (__m128 __A, __m128 __B)
611{
612  return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
613}
614
615/* APPLE LOCAL begin radar 5618945 */
616__STATIC_INLINE int __attribute__((__always_inline__))
617/* APPLE LOCAL end radar 5618945 */
618_mm_comige_ss (__m128 __A, __m128 __B)
619{
620  return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
621}
622
623/* APPLE LOCAL begin radar 5618945 */
624__STATIC_INLINE int __attribute__((__always_inline__))
625/* APPLE LOCAL end radar 5618945 */
626_mm_comineq_ss (__m128 __A, __m128 __B)
627{
628  return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
629}
630
631/* APPLE LOCAL begin radar 5618945 */
632__STATIC_INLINE int __attribute__((__always_inline__))
633/* APPLE LOCAL end radar 5618945 */
634_mm_ucomieq_ss (__m128 __A, __m128 __B)
635{
636  return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
637}
638
639/* APPLE LOCAL begin radar 5618945 */
640__STATIC_INLINE int __attribute__((__always_inline__))
641/* APPLE LOCAL end radar 5618945 */
642_mm_ucomilt_ss (__m128 __A, __m128 __B)
643{
644  return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
645}
646
647/* APPLE LOCAL begin radar 5618945 */
648__STATIC_INLINE int __attribute__((__always_inline__))
649/* APPLE LOCAL end radar 5618945 */
650_mm_ucomile_ss (__m128 __A, __m128 __B)
651{
652  return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
653}
654
655/* APPLE LOCAL begin radar 5618945 */
656__STATIC_INLINE int __attribute__((__always_inline__))
657/* APPLE LOCAL end radar 5618945 */
658_mm_ucomigt_ss (__m128 __A, __m128 __B)
659{
660  return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
661}
662
663/* APPLE LOCAL begin radar 5618945 */
664__STATIC_INLINE int __attribute__((__always_inline__))
665/* APPLE LOCAL end radar 5618945 */
666_mm_ucomige_ss (__m128 __A, __m128 __B)
667{
668  return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
669}
670
671/* APPLE LOCAL begin radar 5618945 */
672__STATIC_INLINE int __attribute__((__always_inline__))
673/* APPLE LOCAL end radar 5618945 */
674_mm_ucomineq_ss (__m128 __A, __m128 __B)
675{
676  return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
677}
678
679/* Convert the lower SPFP value to a 32-bit integer according to the current
680   rounding mode.  */
681/* APPLE LOCAL begin radar 5618945 */
682__STATIC_INLINE int __attribute__((__always_inline__))
683/* APPLE LOCAL end radar 5618945 */
684_mm_cvtss_si32 (__m128 __A)
685{
686  return __builtin_ia32_cvtss2si ((__v4sf) __A);
687}
688
689/* APPLE LOCAL begin radar 5618945 */
690__STATIC_INLINE int __attribute__((__always_inline__))
691/* APPLE LOCAL end radar 5618945 */
692_mm_cvt_ss2si (__m128 __A)
693{
694  return _mm_cvtss_si32 (__A);
695}
696
697#ifdef __x86_64__
698/* Convert the lower SPFP value to a 32-bit integer according to the
699   current rounding mode.  */
700
701/* Intel intrinsic.  */
702/* APPLE LOCAL begin radar 5618945 */
703__STATIC_INLINE long long __attribute__((__always_inline__))
704/* APPLE LOCAL end radar 5618945 */
705_mm_cvtss_si64 (__m128 __A)
706{
707  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
708}
709
710/* Microsoft intrinsic.  */
711/* APPLE LOCAL begin radar 5618945 */
712__STATIC_INLINE long long __attribute__((__always_inline__))
713/* APPLE LOCAL end radar 5618945 */
714_mm_cvtss_si64x (__m128 __A)
715{
716  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
717}
718#endif
719
720/* Convert the two lower SPFP values to 32-bit integers according to the
721   current rounding mode.  Return the integers in packed form.  */
722/* APPLE LOCAL begin radar 5618945 */
723__STATIC_INLINE __m64 __attribute__((__always_inline__))
724/* APPLE LOCAL end radar 5618945 */
725_mm_cvtps_pi32 (__m128 __A)
726{
727  return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
728}
729
730/* APPLE LOCAL begin radar 5618945 */
731__STATIC_INLINE __m64 __attribute__((__always_inline__))
732/* APPLE LOCAL end radar 5618945 */
733_mm_cvt_ps2pi (__m128 __A)
734{
735  return _mm_cvtps_pi32 (__A);
736}
737
738/* Truncate the lower SPFP value to a 32-bit integer.  */
739/* APPLE LOCAL begin radar 5618945 */
740__STATIC_INLINE int __attribute__((__always_inline__))
741/* APPLE LOCAL end radar 5618945 */
742_mm_cvttss_si32 (__m128 __A)
743{
744  return __builtin_ia32_cvttss2si ((__v4sf) __A);
745}
746
747/* APPLE LOCAL begin radar 5618945 */
748__STATIC_INLINE int __attribute__((__always_inline__))
749/* APPLE LOCAL end radar 5618945 */
750_mm_cvtt_ss2si (__m128 __A)
751{
752  return _mm_cvttss_si32 (__A);
753}
754
755#ifdef __x86_64__
756/* Truncate the lower SPFP value to a 32-bit integer.  */
757
758/* Intel intrinsic.  */
759/* APPLE LOCAL begin radar 5618945 */
760__STATIC_INLINE long long __attribute__((__always_inline__))
761/* APPLE LOCAL end radar 5618945 */
762_mm_cvttss_si64 (__m128 __A)
763{
764  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
765}
766
767/* Microsoft intrinsic.  */
768/* APPLE LOCAL begin radar 5618945 */
769__STATIC_INLINE long long __attribute__((__always_inline__))
770/* APPLE LOCAL end radar 5618945 */
771_mm_cvttss_si64x (__m128 __A)
772{
773  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
774}
775#endif
776
777/* Truncate the two lower SPFP values to 32-bit integers.  Return the
778   integers in packed form.  */
779/* APPLE LOCAL begin radar 5618945 */
780__STATIC_INLINE __m64 __attribute__((__always_inline__))
781/* APPLE LOCAL end radar 5618945 */
782_mm_cvttps_pi32 (__m128 __A)
783{
784  return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
785}
786
787/* APPLE LOCAL begin radar 5618945 */
788__STATIC_INLINE __m64 __attribute__((__always_inline__))
789/* APPLE LOCAL end radar 5618945 */
790_mm_cvtt_ps2pi (__m128 __A)
791{
792  return _mm_cvttps_pi32 (__A);
793}
794
795/* Convert B to a SPFP value and insert it as element zero in A.  */
796/* APPLE LOCAL begin radar 5618945 */
797__STATIC_INLINE __m128 __attribute__((__always_inline__))
798/* APPLE LOCAL end radar 5618945 */
799_mm_cvtsi32_ss (__m128 __A, int __B)
800{
801  return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
802}
803
804/* APPLE LOCAL begin radar 5618945 */
805__STATIC_INLINE __m128 __attribute__((__always_inline__))
806/* APPLE LOCAL end radar 5618945 */
807_mm_cvt_si2ss (__m128 __A, int __B)
808{
809  return _mm_cvtsi32_ss (__A, __B);
810}
811
812#ifdef __x86_64__
813/* Convert B to a SPFP value and insert it as element zero in A.  */
814
815/* Intel intrinsic.  */
816/* APPLE LOCAL begin radar 5618945 */
817__STATIC_INLINE __m128 __attribute__((__always_inline__))
818/* APPLE LOCAL end radar 5618945 */
819_mm_cvtsi64_ss (__m128 __A, long long __B)
820{
821  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
822}
823
824/* Microsoft intrinsic.  */
825/* APPLE LOCAL begin radar 5618945 */
826__STATIC_INLINE __m128 __attribute__((__always_inline__))
827/* APPLE LOCAL end radar 5618945 */
828_mm_cvtsi64x_ss (__m128 __A, long long __B)
829{
830  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
831}
832#endif
833
834/* Convert the two 32-bit values in B to SPFP form and insert them
835   as the two lower elements in A.  */
836/* APPLE LOCAL begin radar 5618945 */
837__STATIC_INLINE __m128 __attribute__((__always_inline__))
838/* APPLE LOCAL end radar 5618945 */
839_mm_cvtpi32_ps (__m128 __A, __m64 __B)
840{
841  return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
842}
843
844/* APPLE LOCAL begin radar 5618945 */
845__STATIC_INLINE __m128 __attribute__((__always_inline__))
846/* APPLE LOCAL end radar 5618945 */
847_mm_cvt_pi2ps (__m128 __A, __m64 __B)
848{
849  return _mm_cvtpi32_ps (__A, __B);
850}
851
852/* Convert the four signed 16-bit values in A to SPFP form.  */
853/* APPLE LOCAL begin radar 5618945 */
854__STATIC_INLINE __m128 __attribute__((__always_inline__))
855/* APPLE LOCAL end radar 5618945 */
856_mm_cvtpi16_ps (__m64 __A)
857{
858  __v4hi __sign;
859  __v2si __hisi, __losi;
860  __v4sf __r;
861
862  /* This comparison against zero gives us a mask that can be used to
863     fill in the missing sign bits in the unpack operations below, so
864     that we get signed values after unpacking.  */
865  __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
866
867  /* Convert the four words to doublewords.  */
868  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
869  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
870
871  /* Convert the doublewords to floating point two at a time.  */
872  __r = (__v4sf) _mm_setzero_ps ();
873  __r = __builtin_ia32_cvtpi2ps (__r, __hisi);
874  __r = __builtin_ia32_movlhps (__r, __r);
875  __r = __builtin_ia32_cvtpi2ps (__r, __losi);
876
877  return (__m128) __r;
878}
879
880/* Convert the four unsigned 16-bit values in A to SPFP form.  */
881/* APPLE LOCAL begin radar 5618945 */
882__STATIC_INLINE __m128 __attribute__((__always_inline__))
883/* APPLE LOCAL end radar 5618945 */
884_mm_cvtpu16_ps (__m64 __A)
885{
886  __v2si __hisi, __losi;
887  __v4sf __r;
888
889  /* Convert the four words to doublewords.  */
890  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
891  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
892
893  /* Convert the doublewords to floating point two at a time.  */
894  __r = (__v4sf) _mm_setzero_ps ();
895  __r = __builtin_ia32_cvtpi2ps (__r, __hisi);
896  __r = __builtin_ia32_movlhps (__r, __r);
897  __r = __builtin_ia32_cvtpi2ps (__r, __losi);
898
899  return (__m128) __r;
900}
901
902/* Convert the low four signed 8-bit values in A to SPFP form.  */
903/* APPLE LOCAL begin radar 5618945 */
904__STATIC_INLINE __m128 __attribute__((__always_inline__))
905/* APPLE LOCAL end radar 5618945 */
906_mm_cvtpi8_ps (__m64 __A)
907{
908  __v8qi __sign;
909
910  /* This comparison against zero gives us a mask that can be used to
911     fill in the missing sign bits in the unpack operations below, so
912     that we get signed values after unpacking.  */
913  __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
914
915  /* Convert the four low bytes to words.  */
916  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
917
918  return _mm_cvtpi16_ps(__A);
919}
920
921/* Convert the low four unsigned 8-bit values in A to SPFP form.  */
922/* APPLE LOCAL begin radar 5618945 */
923__STATIC_INLINE __m128 __attribute__((__always_inline__))
924/* APPLE LOCAL end radar 5618945 */
925_mm_cvtpu8_ps(__m64 __A)
926{
927  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
928  return _mm_cvtpu16_ps(__A);
929}
930
931/* Convert the four signed 32-bit values in A and B to SPFP form.  */
932/* APPLE LOCAL begin radar 5618945 */
933__STATIC_INLINE __m128 __attribute__((__always_inline__))
934/* APPLE LOCAL end radar 5618945 */
935_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
936{
937  __v4sf __zero = (__v4sf) _mm_setzero_ps ();
938  __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
939  __v4sf __sfb = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__B);
940  return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
941}
942
943/* Convert the four SPFP values in A to four signed 16-bit integers.  */
944/* APPLE LOCAL begin radar 5618945 */
945__STATIC_INLINE __m64 __attribute__((__always_inline__))
946/* APPLE LOCAL end radar 5618945 */
947_mm_cvtps_pi16(__m128 __A)
948{
949  __v4sf __hisf = (__v4sf)__A;
950  __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
951  __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
952  __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
953  return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
954}
955
956/* Convert the four SPFP values in A to four signed 8-bit integers.  */
957/* APPLE LOCAL begin radar 5618945 */
958__STATIC_INLINE __m64 __attribute__((__always_inline__))
959/* APPLE LOCAL end radar 5618945 */
960_mm_cvtps_pi8(__m128 __A)
961{
962  __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
963  return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
964}
965
966/* Selects four specific SPFP values from A and B based on MASK.  */
967#if 0
968/* APPLE LOCAL begin radar 5618945 */
969__STATIC_INLINE __m128 __attribute__((__always_inline__))
970/* APPLE LOCAL end radar 5618945 */
971_mm_shuffle_ps (__m128 __A, __m128 __B, int __mask)
972{
973  return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
974}
975#else
976#define _mm_shuffle_ps(A, B, MASK) \
977 ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK)))
978#endif
979
980
981/* Selects and interleaves the upper two SPFP values from A and B.  */
982/* APPLE LOCAL begin radar 5618945 */
983__STATIC_INLINE __m128 __attribute__((__always_inline__))
984/* APPLE LOCAL end radar 5618945 */
985_mm_unpackhi_ps (__m128 __A, __m128 __B)
986{
987  return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
988}
989
990/* Selects and interleaves the lower two SPFP values from A and B.  */
991/* APPLE LOCAL begin radar 5618945 */
992__STATIC_INLINE __m128 __attribute__((__always_inline__))
993/* APPLE LOCAL end radar 5618945 */
994_mm_unpacklo_ps (__m128 __A, __m128 __B)
995{
996  return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
997}
998
999/* Sets the upper two SPFP values with 64-bits of data loaded from P;
1000   the lower two values are passed through from A.  */
1001/* APPLE LOCAL begin radar 5618945 */
1002__STATIC_INLINE __m128 __attribute__((__always_inline__))
1003/* APPLE LOCAL end radar 5618945 */
1004_mm_loadh_pi (__m128 __A, __m64 const *__P)
1005{
1006  return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P);
1007}
1008
1009/* Stores the upper two SPFP values of A into P.  */
1010/* APPLE LOCAL begin radar 5618945 */
1011__STATIC_INLINE void __attribute__((__always_inline__))
1012/* APPLE LOCAL end radar 5618945 */
1013_mm_storeh_pi (__m64 *__P, __m128 __A)
1014{
1015  __builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A);
1016}
1017
1018/* Moves the upper two values of B into the lower two values of A.  */
1019/* APPLE LOCAL begin radar 5618945 */
1020__STATIC_INLINE __m128 __attribute__((__always_inline__))
1021/* APPLE LOCAL end radar 5618945 */
1022_mm_movehl_ps (__m128 __A, __m128 __B)
1023{
1024  return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
1025}
1026
1027/* Moves the lower two values of B into the upper two values of A.  */
1028/* APPLE LOCAL begin radar 5618945 */
1029__STATIC_INLINE __m128 __attribute__((__always_inline__))
1030/* APPLE LOCAL end radar 5618945 */
1031_mm_movelh_ps (__m128 __A, __m128 __B)
1032{
1033  return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
1034}
1035
1036/* Sets the lower two SPFP values with 64-bits of data loaded from P;
1037   the upper two values are passed through from A.  */
1038/* APPLE LOCAL begin radar 5618945 */
1039__STATIC_INLINE __m128 __attribute__((__always_inline__))
1040/* APPLE LOCAL end radar 5618945 */
1041_mm_loadl_pi (__m128 __A, __m64 const *__P)
1042{
1043  return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P);
1044}
1045
1046/* Stores the lower two SPFP values of A into P.  */
1047/* APPLE LOCAL begin radar 5618945 */
1048__STATIC_INLINE void __attribute__((__always_inline__))
1049/* APPLE LOCAL end radar 5618945 */
1050_mm_storel_pi (__m64 *__P, __m128 __A)
1051{
1052  __builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A);
1053}
1054
1055/* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
1056/* APPLE LOCAL begin radar 5618945 */
1057__STATIC_INLINE int __attribute__((__always_inline__))
1058/* APPLE LOCAL end radar 5618945 */
1059_mm_movemask_ps (__m128 __A)
1060{
1061  return __builtin_ia32_movmskps ((__v4sf)__A);
1062}
1063
1064/* Return the contents of the control register.  */
1065/* APPLE LOCAL begin radar 5618945 */
1066__STATIC_INLINE unsigned int __attribute__((__always_inline__))
1067/* APPLE LOCAL end radar 5618945 */
1068_mm_getcsr (void)
1069{
1070  return __builtin_ia32_stmxcsr ();
1071}
1072
1073/* Read exception bits from the control register.  */
1074/* APPLE LOCAL begin radar 5618945 */
1075__STATIC_INLINE unsigned int __attribute__((__always_inline__))
1076/* APPLE LOCAL end radar 5618945 */
1077_MM_GET_EXCEPTION_STATE (void)
1078{
1079  return _mm_getcsr() & _MM_EXCEPT_MASK;
1080}
1081
1082/* APPLE LOCAL begin radar 5618945 */
1083__STATIC_INLINE unsigned int __attribute__((__always_inline__))
1084/* APPLE LOCAL end radar 5618945 */
1085_MM_GET_EXCEPTION_MASK (void)
1086{
1087  return _mm_getcsr() & _MM_MASK_MASK;
1088}
1089
1090/* APPLE LOCAL begin radar 5618945 */
1091__STATIC_INLINE unsigned int __attribute__((__always_inline__))
1092/* APPLE LOCAL end radar 5618945 */
1093_MM_GET_ROUNDING_MODE (void)
1094{
1095  return _mm_getcsr() & _MM_ROUND_MASK;
1096}
1097
1098/* APPLE LOCAL begin radar 5618945 */
1099__STATIC_INLINE unsigned int __attribute__((__always_inline__))
1100/* APPLE LOCAL end radar 5618945 */
1101_MM_GET_FLUSH_ZERO_MODE (void)
1102{
1103  return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
1104}
1105
1106/* Set the control register to I.  */
1107/* APPLE LOCAL begin radar 5618945 */
1108__STATIC_INLINE void __attribute__((__always_inline__))
1109/* APPLE LOCAL end radar 5618945 */
1110_mm_setcsr (unsigned int __I)
1111{
1112  __builtin_ia32_ldmxcsr (__I);
1113}
1114
1115/* Set exception bits in the control register.  */
1116/* APPLE LOCAL begin radar 5618945 */
1117__STATIC_INLINE void __attribute__((__always_inline__))
1118/* APPLE LOCAL end radar 5618945 */
1119_MM_SET_EXCEPTION_STATE(unsigned int __mask)
1120{
1121  _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
1122}
1123
1124/* APPLE LOCAL begin radar 5618945 */
1125__STATIC_INLINE void __attribute__((__always_inline__))
1126/* APPLE LOCAL end radar 5618945 */
1127_MM_SET_EXCEPTION_MASK (unsigned int __mask)
1128{
1129  _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
1130}
1131
1132/* APPLE LOCAL begin radar 5618945 */
1133__STATIC_INLINE void __attribute__((__always_inline__))
1134/* APPLE LOCAL end radar 5618945 */
1135_MM_SET_ROUNDING_MODE (unsigned int __mode)
1136{
1137  _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
1138}
1139
1140/* APPLE LOCAL begin radar 5618945 */
1141__STATIC_INLINE void __attribute__((__always_inline__))
1142/* APPLE LOCAL end radar 5618945 */
1143_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
1144{
1145  _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
1146}
1147
1148/* Create a vector with element 0 as F and the rest zero.  */
1149/* APPLE LOCAL begin radar 5618945 */
1150__STATIC_INLINE __m128 __attribute__((__always_inline__))
1151/* APPLE LOCAL end radar 5618945 */
1152_mm_set_ss (float __F)
1153{
1154  return __extension__ (__m128)(__v4sf){ __F, 0, 0, 0 };
1155}
1156
1157/* Create a vector with all four elements equal to F.  */
1158/* APPLE LOCAL begin radar 5618945 */
1159__STATIC_INLINE __m128 __attribute__((__always_inline__))
1160/* APPLE LOCAL end radar 5618945 */
1161_mm_set1_ps (float __F)
1162{
1163  return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
1164}
1165
1166/* APPLE LOCAL begin radar 5618945 */
1167__STATIC_INLINE __m128 __attribute__((__always_inline__))
1168/* APPLE LOCAL end radar 5618945 */
1169_mm_set_ps1 (float __F)
1170{
1171  return _mm_set1_ps (__F);
1172}
1173
1174/* Create a vector with element 0 as *P and the rest zero.  */
1175/* APPLE LOCAL begin radar 5618945 */
1176__STATIC_INLINE __m128 __attribute__((__always_inline__))
1177/* APPLE LOCAL end radar 5618945 */
1178_mm_load_ss (float const *__P)
1179{
1180  return _mm_set_ss (*__P);
1181}
1182
1183/* Create a vector with all four elements equal to *P.  */
1184/* APPLE LOCAL begin radar 5618945 */
1185__STATIC_INLINE __m128 __attribute__((__always_inline__))
1186/* APPLE LOCAL end radar 5618945 */
1187_mm_load1_ps (float const *__P)
1188{
1189  return _mm_set1_ps (*__P);
1190}
1191
1192/* APPLE LOCAL begin radar 5618945 */
1193__STATIC_INLINE __m128 __attribute__((__always_inline__))
1194/* APPLE LOCAL end radar 5618945 */
1195_mm_load_ps1 (float const *__P)
1196{
1197  return _mm_load1_ps (__P);
1198}
1199
1200/* Load four SPFP values from P.  The address must be 16-byte aligned.  */
1201/* APPLE LOCAL begin radar 5618945 */
1202__STATIC_INLINE __m128 __attribute__((__always_inline__))
1203/* APPLE LOCAL end radar 5618945 */
1204_mm_load_ps (float const *__P)
1205{
1206  return (__m128) *(__v4sf *)__P;
1207}
1208
1209/* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
1210/* APPLE LOCAL begin radar 5618945 */
1211__STATIC_INLINE __m128 __attribute__((__always_inline__))
1212/* APPLE LOCAL end radar 5618945 */
1213_mm_loadu_ps (float const *__P)
1214{
1215  return (__m128) __builtin_ia32_loadups (__P);
1216}
1217
1218/* Load four SPFP values in reverse order.  The address must be aligned.  */
1219/* APPLE LOCAL begin radar 5618945 */
1220__STATIC_INLINE __m128 __attribute__((__always_inline__))
1221/* APPLE LOCAL end radar 5618945 */
1222_mm_loadr_ps (float const *__P)
1223{
1224  __v4sf __tmp = *(__v4sf *)__P;
1225  return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
1226}
1227
1228/* Create the vector [Z Y X W].  */
1229/* APPLE LOCAL begin radar 5618945 */
1230__STATIC_INLINE __m128 __attribute__((__always_inline__))
1231/* APPLE LOCAL end radar 5618945 */
1232_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
1233{
1234  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
1235}
1236
1237/* Create the vector [W X Y Z].  */
1238/* APPLE LOCAL begin radar 5618945 */
1239__STATIC_INLINE __m128 __attribute__((__always_inline__))
1240/* APPLE LOCAL end radar 5618945 */
1241_mm_setr_ps (float __Z, float __Y, float __X, float __W)
1242{
1243  return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
1244}
1245
1246/* Stores the lower SPFP value.  */
1247/* APPLE LOCAL begin radar 5618945 */
1248__STATIC_INLINE void __attribute__((__always_inline__))
1249/* APPLE LOCAL end radar 5618945 */
1250_mm_store_ss (float *__P, __m128 __A)
1251{
1252  *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
1253}
1254
1255/* APPLE LOCAL begin radar 5618945 */
1256__STATIC_INLINE float __attribute__((__always_inline__))
1257/* APPLE LOCAL end radar 5618945 */
1258_mm_cvtss_f32 (__m128 __A)
1259{
1260  return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
1261}
1262
1263/* Store four SPFP values.  The address must be 16-byte aligned.  */
1264/* APPLE LOCAL begin radar 5618945 */
1265__STATIC_INLINE void __attribute__((__always_inline__))
1266/* APPLE LOCAL end radar 5618945 */
1267_mm_store_ps (float *__P, __m128 __A)
1268{
1269  *(__v4sf *)__P = (__v4sf)__A;
1270}
1271
1272/* Store four SPFP values.  The address need not be 16-byte aligned.  */
1273/* APPLE LOCAL begin radar 5618945 */
1274__STATIC_INLINE void __attribute__((__always_inline__))
1275/* APPLE LOCAL end radar 5618945 */
1276_mm_storeu_ps (float *__P, __m128 __A)
1277{
1278  __builtin_ia32_storeups (__P, (__v4sf)__A);
1279}
1280
1281/* Store the lower SPFP value across four words.  */
1282/* APPLE LOCAL begin radar 5618945 */
1283__STATIC_INLINE void __attribute__((__always_inline__))
1284/* APPLE LOCAL end radar 5618945 */
1285_mm_store1_ps (float *__P, __m128 __A)
1286{
1287  __v4sf __va = (__v4sf)__A;
1288  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
1289  _mm_storeu_ps (__P, __tmp);
1290}
1291
1292/* APPLE LOCAL begin radar 5618945 */
1293__STATIC_INLINE void __attribute__((__always_inline__))
1294/* APPLE LOCAL end radar 5618945 */
1295_mm_store_ps1 (float *__P, __m128 __A)
1296{
1297  _mm_store1_ps (__P, __A);
1298}
1299
1300/* Store four SPFP values in reverse order.  The address must be aligned.  */
1301/* APPLE LOCAL begin radar 5618945 */
1302__STATIC_INLINE void __attribute__((__always_inline__))
1303/* APPLE LOCAL end radar 5618945 */
1304_mm_storer_ps (float *__P, __m128 __A)
1305{
1306  __v4sf __va = (__v4sf)__A;
1307  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
1308  _mm_store_ps (__P, __tmp);
1309}
1310
1311/* Sets the low SPFP value of A from the low value of B.  */
1312/* APPLE LOCAL begin radar 5618945 */
1313__STATIC_INLINE __m128 __attribute__((__always_inline__))
1314/* APPLE LOCAL end radar 5618945 */
1315_mm_move_ss (__m128 __A, __m128 __B)
1316{
1317  return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
1318}
1319
1320/* Extracts one of the four words of A.  The selector N must be immediate.  */
1321#if 0
1322/* APPLE LOCAL begin radar 5618945 */
1323__STATIC_INLINE int __attribute__((__always_inline__))
1324/* APPLE LOCAL end radar 5618945 */
1325_mm_extract_pi16 (__m64 const __A, int const __N)
1326{
1327  return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
1328}
1329
1330/* APPLE LOCAL begin radar 5618945 */
1331__STATIC_INLINE int __attribute__((__always_inline__))
1332/* APPLE LOCAL end radar 5618945 */
1333_m_pextrw (__m64 const __A, int const __N)
1334{
1335  return _mm_extract_pi16 (__A, __N);
1336}
1337#else
1338#define _mm_extract_pi16(A, N)	__builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N))
1339#define _m_pextrw(A, N)		_mm_extract_pi16((A), (N))
1340#endif
1341
1342/* Inserts word D into one of four words of A.  The selector N must be
1343   immediate.  */
1344#if 0
1345/* APPLE LOCAL begin radar 5618945 */
1346__STATIC_INLINE __m64 __attribute__((__always_inline__))
1347/* APPLE LOCAL end radar 5618945 */
1348_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
1349{
1350  return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
1351}
1352
1353/* APPLE LOCAL begin radar 5618945 */
1354__STATIC_INLINE __m64 __attribute__((__always_inline__))
1355/* APPLE LOCAL end radar 5618945 */
1356_m_pinsrw (__m64 const __A, int const __D, int const __N)
1357{
1358  return _mm_insert_pi16 (__A, __D, __N);
1359}
1360#else
1361#define _mm_insert_pi16(A, D, N) \
1362  ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N)))
1363#define _m_pinsrw(A, D, N)	 _mm_insert_pi16((A), (D), (N))
1364#endif
1365
1366/* Compute the element-wise maximum of signed 16-bit values.  */
1367/* APPLE LOCAL begin radar 5618945 */
1368__STATIC_INLINE __m64 __attribute__((__always_inline__))
1369/* APPLE LOCAL end radar 5618945 */
1370_mm_max_pi16 (__m64 __A, __m64 __B)
1371{
1372  return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
1373}
1374
1375/* APPLE LOCAL begin radar 5618945 */
1376__STATIC_INLINE __m64 __attribute__((__always_inline__))
1377/* APPLE LOCAL end radar 5618945 */
1378_m_pmaxsw (__m64 __A, __m64 __B)
1379{
1380  return _mm_max_pi16 (__A, __B);
1381}
1382
1383/* Compute the element-wise maximum of unsigned 8-bit values.  */
1384/* APPLE LOCAL begin radar 5618945 */
1385__STATIC_INLINE __m64 __attribute__((__always_inline__))
1386/* APPLE LOCAL end radar 5618945 */
1387_mm_max_pu8 (__m64 __A, __m64 __B)
1388{
1389  return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
1390}
1391
1392/* APPLE LOCAL begin radar 5618945 */
1393__STATIC_INLINE __m64 __attribute__((__always_inline__))
1394/* APPLE LOCAL end radar 5618945 */
1395_m_pmaxub (__m64 __A, __m64 __B)
1396{
1397  return _mm_max_pu8 (__A, __B);
1398}
1399
1400/* Compute the element-wise minimum of signed 16-bit values.  */
1401/* APPLE LOCAL begin radar 5618945 */
1402__STATIC_INLINE __m64 __attribute__((__always_inline__))
1403/* APPLE LOCAL end radar 5618945 */
1404_mm_min_pi16 (__m64 __A, __m64 __B)
1405{
1406  return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
1407}
1408
1409/* APPLE LOCAL begin radar 5618945 */
1410__STATIC_INLINE __m64 __attribute__((__always_inline__))
1411/* APPLE LOCAL end radar 5618945 */
1412_m_pminsw (__m64 __A, __m64 __B)
1413{
1414  return _mm_min_pi16 (__A, __B);
1415}
1416
1417/* Compute the element-wise minimum of unsigned 8-bit values.  */
1418/* APPLE LOCAL begin radar 5618945 */
1419__STATIC_INLINE __m64 __attribute__((__always_inline__))
1420/* APPLE LOCAL end radar 5618945 */
1421_mm_min_pu8 (__m64 __A, __m64 __B)
1422{
1423  return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
1424}
1425
1426/* APPLE LOCAL begin radar 5618945 */
1427__STATIC_INLINE __m64 __attribute__((__always_inline__))
1428/* APPLE LOCAL end radar 5618945 */
1429_m_pminub (__m64 __A, __m64 __B)
1430{
1431  return _mm_min_pu8 (__A, __B);
1432}
1433
1434/* Create an 8-bit mask of the signs of 8-bit values.  */
1435/* APPLE LOCAL begin radar 5618945 */
1436__STATIC_INLINE int __attribute__((__always_inline__))
1437/* APPLE LOCAL end radar 5618945 */
1438_mm_movemask_pi8 (__m64 __A)
1439{
1440  return __builtin_ia32_pmovmskb ((__v8qi)__A);
1441}
1442
1443/* APPLE LOCAL begin radar 5618945 */
1444__STATIC_INLINE int __attribute__((__always_inline__))
1445/* APPLE LOCAL end radar 5618945 */
1446_m_pmovmskb (__m64 __A)
1447{
1448  return _mm_movemask_pi8 (__A);
1449}
1450
1451/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
1452   in B and produce the high 16 bits of the 32-bit results.  */
1453/* APPLE LOCAL begin radar 5618945 */
1454__STATIC_INLINE __m64 __attribute__((__always_inline__))
1455/* APPLE LOCAL end radar 5618945 */
1456_mm_mulhi_pu16 (__m64 __A, __m64 __B)
1457{
1458  return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
1459}
1460
1461/* APPLE LOCAL begin radar 5618945 */
1462__STATIC_INLINE __m64 __attribute__((__always_inline__))
1463/* APPLE LOCAL end radar 5618945 */
1464_m_pmulhuw (__m64 __A, __m64 __B)
1465{
1466  return _mm_mulhi_pu16 (__A, __B);
1467}
1468
1469/* Return a combination of the four 16-bit values in A.  The selector
1470   must be an immediate.  */
1471#if 0
1472/* APPLE LOCAL begin radar 5618945 */
1473__STATIC_INLINE __m64 __attribute__((__always_inline__))
1474/* APPLE LOCAL end radar 5618945 */
1475_mm_shuffle_pi16 (__m64 __A, int __N)
1476{
1477  return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
1478}
1479
1480/* APPLE LOCAL begin radar 5618945 */
1481__STATIC_INLINE __m64 __attribute__((__always_inline__))
1482/* APPLE LOCAL end radar 5618945 */
1483_m_pshufw (__m64 __A, int __N)
1484{
1485  return _mm_shuffle_pi16 (__A, __N);
1486}
1487#else
1488#define _mm_shuffle_pi16(A, N) \
1489  ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N)))
1490#define _m_pshufw(A, N)		_mm_shuffle_pi16 ((A), (N))
1491#endif
1492
1493/* Conditionally store byte elements of A into P.  The high bit of each
1494   byte in the selector N determines whether the corresponding byte from
1495   A is stored.  */
1496/* APPLE LOCAL begin radar 5618945 */
1497__STATIC_INLINE void __attribute__((__always_inline__))
1498/* APPLE LOCAL end radar 5618945 */
1499_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
1500{
1501  __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
1502}
1503
1504/* APPLE LOCAL begin radar 5618945 */
1505__STATIC_INLINE void __attribute__((__always_inline__))
1506/* APPLE LOCAL end radar 5618945 */
1507_m_maskmovq (__m64 __A, __m64 __N, char *__P)
1508{
1509  _mm_maskmove_si64 (__A, __N, __P);
1510}
1511
1512/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
1513/* APPLE LOCAL begin radar 5618945 */
1514__STATIC_INLINE __m64 __attribute__((__always_inline__))
1515/* APPLE LOCAL end radar 5618945 */
1516_mm_avg_pu8 (__m64 __A, __m64 __B)
1517{
1518  return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
1519}
1520
1521/* APPLE LOCAL begin radar 5618945 */
1522__STATIC_INLINE __m64 __attribute__((__always_inline__))
1523/* APPLE LOCAL end radar 5618945 */
1524_m_pavgb (__m64 __A, __m64 __B)
1525{
1526  return _mm_avg_pu8 (__A, __B);
1527}
1528
1529/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
1530/* APPLE LOCAL begin radar 5618945 */
1531__STATIC_INLINE __m64 __attribute__((__always_inline__))
1532/* APPLE LOCAL end radar 5618945 */
1533_mm_avg_pu16 (__m64 __A, __m64 __B)
1534{
1535  return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
1536}
1537
1538/* APPLE LOCAL begin radar 5618945 */
1539__STATIC_INLINE __m64 __attribute__((__always_inline__))
1540/* APPLE LOCAL end radar 5618945 */
1541_m_pavgw (__m64 __A, __m64 __B)
1542{
1543  return _mm_avg_pu16 (__A, __B);
1544}
1545
1546/* Compute the sum of the absolute differences of the unsigned 8-bit
1547   values in A and B.  Return the value in the lower 16-bit word; the
1548   upper words are cleared.  */
1549/* APPLE LOCAL begin radar 5618945 */
1550__STATIC_INLINE __m64 __attribute__((__always_inline__))
1551/* APPLE LOCAL end radar 5618945 */
1552_mm_sad_pu8 (__m64 __A, __m64 __B)
1553{
1554  return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
1555}
1556
1557/* APPLE LOCAL begin radar 5618945 */
1558__STATIC_INLINE __m64 __attribute__((__always_inline__))
1559/* APPLE LOCAL end radar 5618945 */
1560_m_psadbw (__m64 __A, __m64 __B)
1561{
1562  return _mm_sad_pu8 (__A, __B);
1563}
1564
1565/* Loads one cache line from address P to a location "closer" to the
1566   processor.  The selector I specifies the type of prefetch operation.  */
1567#if 0
1568/* APPLE LOCAL begin radar 5618945 */
1569__STATIC_INLINE void __attribute__((__always_inline__))
1570/* APPLE LOCAL end radar 5618945 */
1571_mm_prefetch (void *__P, enum _mm_hint __I)
1572{
1573  __builtin_prefetch (__P, 0, __I);
1574}
1575#else
1576#define _mm_prefetch(P, I) \
1577  __builtin_prefetch ((P), 0, (I))
1578#endif
1579
1580/* Stores the data in A to the address P without polluting the caches.  */
1581/* APPLE LOCAL begin radar 5618945 */
1582__STATIC_INLINE void __attribute__((__always_inline__))
1583/* APPLE LOCAL end radar 5618945 */
1584_mm_stream_pi (__m64 *__P, __m64 __A)
1585{
1586  /* APPLE LOCAL 4656532 use V1DImode for _m64 */
1587  __builtin_ia32_movntq (__P, __A);
1588}
1589
1590/* Likewise.  The address must be 16-byte aligned.  */
1591/* APPLE LOCAL begin radar 5618945 */
1592__STATIC_INLINE void __attribute__((__always_inline__))
1593/* APPLE LOCAL end radar 5618945 */
1594_mm_stream_ps (float *__P, __m128 __A)
1595{
1596  __builtin_ia32_movntps (__P, (__v4sf)__A);
1597}
1598
1599/* Guarantees that every preceding store is globally visible before
1600   any subsequent store.  */
1601/* APPLE LOCAL begin radar 5618945 */
1602__STATIC_INLINE void __attribute__((__always_inline__))
1603/* APPLE LOCAL end radar 5618945 */
1604_mm_sfence (void)
1605{
1606  __builtin_ia32_sfence ();
1607}
1608
1609/* The execution of the next instruction is delayed by an implementation
1610   specific amount of time.  The instruction does not modify the
1611   architectural state.  */
1612/* APPLE LOCAL begin radar 5618945 */
1613__STATIC_INLINE void __attribute__((__always_inline__))
1614/* APPLE LOCAL end radar 5618945 */
1615_mm_pause (void)
1616{
1617  __asm__ __volatile__ ("rep; nop" : : );
1618}
1619/* APPLE LOCAL end radar 4152603 */
1620
1621/* Transpose the 4x4 matrix composed of row[0-3].  */
1622#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
1623do {									\
1624  __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);	\
1625  __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1);			\
1626  __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3);			\
1627  __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1);			\
1628  __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3);			\
1629  (row0) = __builtin_ia32_movlhps (__t0, __t1);				\
1630  (row1) = __builtin_ia32_movhlps (__t1, __t0);				\
1631  (row2) = __builtin_ia32_movlhps (__t2, __t3);				\
1632  (row3) = __builtin_ia32_movhlps (__t3, __t2);				\
1633} while (0)
1634
1635/* APPLE LOCAL begin nodebug inline 4152603 */
1636#undef __always_inline__
1637/* APPLE LOCAL end nodebug inline 4152603 */
1638
1639/* For backward source compatibility.  */
1640#include <emmintrin.h>
1641
1642#endif /* __SSE__ */
1643#endif /* _XMMINTRIN_H_INCLUDED */
1644