1// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
2
3#include <xmmintrin.h>
4#include <emmintrin.h>
5#include <smmintrin.h>
6
7__m128 test_rsqrt_ss(__m128 x) {
8  // CHECK: define {{.*}} @test_rsqrt_ss
9  // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss
10  // CHECK: extractelement <4 x float> {{.*}}, i32 0
11  // CHECK: extractelement <4 x float> {{.*}}, i32 1
12  // CHECK: extractelement <4 x float> {{.*}}, i32 2
13  // CHECK: extractelement <4 x float> {{.*}}, i32 3
14  return _mm_rsqrt_ss(x);
15}
16
17__m128 test_rcp_ss(__m128 x) {
18  // CHECK: define {{.*}} @test_rcp_ss
19  // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss
20  // CHECK: extractelement <4 x float> {{.*}}, i32 0
21  // CHECK: extractelement <4 x float> {{.*}}, i32 1
22  // CHECK: extractelement <4 x float> {{.*}}, i32 2
23  // CHECK: extractelement <4 x float> {{.*}}, i32 3
24  return _mm_rcp_ss(x);
25}
26
27__m128 test_sqrt_ss(__m128 x) {
28  // CHECK: define {{.*}} @test_sqrt_ss
29  // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
30  // CHECK: extractelement <4 x float> {{.*}}, i32 0
31  // CHECK: extractelement <4 x float> {{.*}}, i32 1
32  // CHECK: extractelement <4 x float> {{.*}}, i32 2
33  // CHECK: extractelement <4 x float> {{.*}}, i32 3
34  return _mm_sqrt_ss(x);
35}
36
37__m128 test_loadl_pi(__m128 x, void* y) {
38  // CHECK: define {{.*}} @test_loadl_pi
39  // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
40  // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
41  // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
42  return _mm_loadl_pi(x,y);
43}
44
45__m128 test_loadh_pi(__m128 x, void* y) {
46  // CHECK: define {{.*}} @test_loadh_pi
47  // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
48  // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
49  // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
50  return _mm_loadh_pi(x,y);
51}
52
53__m128 test_load_ss(void* y) {
54  // CHECK: define {{.*}} @test_load_ss
55  // CHECK: load float, float* {{.*}}, align 1{{$}}
56  return _mm_load_ss(y);
57}
58
59__m128 test_load1_ps(void* y) {
60  // CHECK: define {{.*}} @test_load1_ps
61  // CHECK: load float, float* {{.*}}, align 1{{$}}
62  return _mm_load1_ps(y);
63}
64
65void test_store_ss(__m128 x, void* y) {
66  // CHECK-LABEL: define void @test_store_ss
67  // CHECK: store {{.*}} float* {{.*}}, align 1{{$}}
68  _mm_store_ss(y, x);
69}
70
71__m128d test_load1_pd(__m128 x, void* y) {
72  // CHECK: define {{.*}} @test_load1_pd
73  // CHECK: load double, double* {{.*}}, align 1{{$}}
74  return _mm_load1_pd(y);
75}
76
77__m128d test_loadr_pd(__m128 x, void* y) {
78  // CHECK: define {{.*}} @test_loadr_pd
79  // CHECK: load <2 x double>, <2 x double>* {{.*}}, align 16{{$}}
80  return _mm_loadr_pd(y);
81}
82
83__m128d test_load_sd(void* y) {
84  // CHECK: define {{.*}} @test_load_sd
85  // CHECK: load double, double* {{.*}}, align 1{{$}}
86  return _mm_load_sd(y);
87}
88
89__m128d test_loadh_pd(__m128d x, void* y) {
90  // CHECK: define {{.*}} @test_loadh_pd
91  // CHECK: load double, double* {{.*}}, align 1{{$}}
92  return _mm_loadh_pd(x, y);
93}
94
95__m128d test_loadl_pd(__m128d x, void* y) {
96  // CHECK: define {{.*}} @test_loadl_pd
97  // CHECK: load double, double* {{.*}}, align 1{{$}}
98  return _mm_loadl_pd(x, y);
99}
100
101void test_store_sd(__m128d x, void* y) {
102  // CHECK-LABEL: define void @test_store_sd
103  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
104  _mm_store_sd(y, x);
105}
106
107void test_store1_pd(__m128d x, void* y) {
108  // CHECK-LABEL: define void @test_store1_pd
109  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
110  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
111  _mm_store1_pd(y, x);
112}
113
114void test_storer_pd(__m128d x, void* y) {
115  // CHECK-LABEL: define void @test_storer_pd
116  // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
117  _mm_storer_pd(y, x);
118}
119
120void test_storeh_pd(__m128d x, void* y) {
121  // CHECK-LABEL: define void @test_storeh_pd
122  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
123  _mm_storeh_pd(y, x);
124}
125
126void test_storel_pd(__m128d x, void* y) {
127  // CHECK-LABEL: define void @test_storel_pd
128  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
129  _mm_storel_pd(y, x);
130}
131
132__m128i test_loadl_epi64(void* y) {
133  // CHECK: define {{.*}} @test_loadl_epi64
134  // CHECK: load i64, i64* {{.*}}, align 1{{$}}
135  return _mm_loadl_epi64(y);
136}
137
138void test_storel_epi64(__m128i x, void* y) {
139  // CHECK-LABEL: define void @test_storel_epi64
140  // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
141  _mm_storel_epi64(y, x);
142}
143
144void test_stream_si32(int x, void *y) {
145  // CHECK-LABEL: define void @test_stream_si32
146  // CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal
147  _mm_stream_si32(y, x);
148}
149
150void test_stream_si64(long long x, void *y) {
151  // CHECK-LABEL: define void @test_stream_si64
152  // CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal
153  _mm_stream_si64(y, x);
154}
155
156void test_stream_si128(__m128i x, void *y) {
157  // CHECK-LABEL: define void @test_stream_si128
158  // CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal
159  _mm_stream_si128(y, x);
160}
161
162void test_extract_epi16(__m128i __a) {
163  // CHECK-LABEL: define void @test_extract_epi16
164  // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
165  // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
166  _mm_extract_epi16(__a, 8);
167}
168
169__m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
170  // CHECK-LABEL: @test_mm_cmpeq_ss
171  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
172  return _mm_cmpeq_ss(__a, __b);
173}
174
175__m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
176  // CHECK-LABEL: @test_mm_cmplt_ss
177  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
178  return _mm_cmplt_ss(__a, __b);
179}
180
181__m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
182  // CHECK-LABEL: @test_mm_cmple_ss
183  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
184  return _mm_cmple_ss(__a, __b);
185}
186
187__m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
188  // CHECK-LABEL: @test_mm_cmpunord_ss
189  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
190  return _mm_cmpunord_ss(__a, __b);
191}
192
193__m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
194  // CHECK-LABEL: @test_mm_cmpneq_ss
195  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
196  return _mm_cmpneq_ss(__a, __b);
197}
198
199__m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
200  // CHECK-LABEL: @test_mm_cmpnlt_ss
201  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
202  return _mm_cmpnlt_ss(__a, __b);
203}
204
205__m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
206  // CHECK-LABEL: @test_mm_cmpnle_ss
207  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
208  return _mm_cmpnle_ss(__a, __b);
209}
210
211__m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
212  // CHECK-LABEL: @test_mm_cmpord_ss
213  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
214  return _mm_cmpord_ss(__a, __b);
215}
216
217__m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
218  // CHECK-LABEL: @test_mm_cmpgt_ss
219  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
220  return _mm_cmpgt_ss(__a, __b);
221}
222
223__m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
224  // CHECK-LABEL: @test_mm_cmpge_ss
225  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
226  return _mm_cmpge_ss(__a, __b);
227}
228
229__m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
230  // CHECK-LABEL: @test_mm_cmpngt_ss
231  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
232  return _mm_cmpngt_ss(__a, __b);
233}
234
235__m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
236  // CHECK-LABEL: @test_mm_cmpnge_ss
237  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
238  return _mm_cmpnge_ss(__a, __b);
239}
240
241__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
242  // CHECK-LABEL: @test_mm_cmpeq_ps
243  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
244  return _mm_cmpeq_ps(__a, __b);
245}
246
247__m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
248  // CHECK-LABEL: @test_mm_cmplt_ps
249  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
250  return _mm_cmplt_ps(__a, __b);
251}
252
253__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
254  // CHECK-LABEL: @test_mm_cmple_ps
255  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
256  return _mm_cmple_ps(__a, __b);
257}
258
259__m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
260  // CHECK-LABEL: @test_mm_cmpunord_ps
261  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
262  return _mm_cmpunord_ps(__a, __b);
263}
264
265__m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
266  // CHECK-LABEL: @test_mm_cmpneq_ps
267  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
268  return _mm_cmpneq_ps(__a, __b);
269}
270
271__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
272  // CHECK-LABEL: @test_mm_cmpnlt_ps
273  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
274  return _mm_cmpnlt_ps(__a, __b);
275}
276
277__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
278  // CHECK-LABEL: @test_mm_cmpnle_ps
279  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
280  return _mm_cmpnle_ps(__a, __b);
281}
282
283__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
284  // CHECK-LABEL: @test_mm_cmpord_ps
285  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
286  return _mm_cmpord_ps(__a, __b);
287}
288
289__m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
290  // CHECK-LABEL: @test_mm_cmpgt_ps
291  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
292  return _mm_cmpgt_ps(__a, __b);
293}
294
295__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
296  // CHECK-LABEL: @test_mm_cmpge_ps
297  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
298  return _mm_cmpge_ps(__a, __b);
299}
300
301__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
302  // CHECK-LABEL: @test_mm_cmpngt_ps
303  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
304  return _mm_cmpngt_ps(__a, __b);
305}
306
307__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
308  // CHECK-LABEL: @test_mm_cmpnge_ps
309  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
310  return _mm_cmpnge_ps(__a, __b);
311}
312
313__m128d test_mm_cmpeq_sd(__m128d __a, __m128d __b) {
314  // CHECK-LABEL: @test_mm_cmpeq_sd
315  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
316  return _mm_cmpeq_sd(__a, __b);
317}
318
319__m128d test_mm_cmplt_sd(__m128d __a, __m128d __b) {
320  // CHECK-LABEL: @test_mm_cmplt_sd
321  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
322  return _mm_cmplt_sd(__a, __b);
323}
324
325__m128d test_mm_cmple_sd(__m128d __a, __m128d __b) {
326  // CHECK-LABEL: @test_mm_cmple_sd
327  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
328  return _mm_cmple_sd(__a, __b);
329}
330
331__m128d test_mm_cmpunord_sd(__m128d __a, __m128d __b) {
332  // CHECK-LABEL: @test_mm_cmpunord_sd
333  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
334  return _mm_cmpunord_sd(__a, __b);
335}
336
337__m128d test_mm_cmpneq_sd(__m128d __a, __m128d __b) {
338  // CHECK-LABEL: @test_mm_cmpneq_sd
339  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
340  return _mm_cmpneq_sd(__a, __b);
341}
342
343__m128d test_mm_cmpnlt_sd(__m128d __a, __m128d __b) {
344  // CHECK-LABEL: @test_mm_cmpnlt_sd
345  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
346  return _mm_cmpnlt_sd(__a, __b);
347}
348
349__m128d test_mm_cmpnle_sd(__m128d __a, __m128d __b) {
350  // CHECK-LABEL: @test_mm_cmpnle_sd
351  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
352  return _mm_cmpnle_sd(__a, __b);
353}
354
355__m128d test_mm_cmpord_sd(__m128d __a, __m128d __b) {
356  // CHECK-LABEL: @test_mm_cmpord_sd
357  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
358  return _mm_cmpord_sd(__a, __b);
359}
360
361__m128d test_mm_cmpgt_sd(__m128d __a, __m128d __b) {
362  // CHECK-LABEL: @test_mm_cmpgt_sd
363  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
364  return _mm_cmpgt_sd(__a, __b);
365}
366
367__m128d test_mm_cmpge_sd(__m128d __a, __m128d __b) {
368  // CHECK-LABEL: @test_mm_cmpge_sd
369  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
370  return _mm_cmpge_sd(__a, __b);
371}
372
373__m128d test_mm_cmpngt_sd(__m128d __a, __m128d __b) {
374  // CHECK-LABEL: @test_mm_cmpngt_sd
375  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
376  return _mm_cmpngt_sd(__a, __b);
377}
378
379__m128d test_mm_cmpnge_sd(__m128d __a, __m128d __b) {
380  // CHECK-LABEL: @test_mm_cmpnge_sd
381  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
382  return _mm_cmpnge_sd(__a, __b);
383}
384
385__m128d test_mm_cmpeq_pd(__m128d __a, __m128d __b) {
386  // CHECK-LABEL: @test_mm_cmpeq_pd
387  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
388  return _mm_cmpeq_pd(__a, __b);
389}
390
391__m128d test_mm_cmplt_pd(__m128d __a, __m128d __b) {
392  // CHECK-LABEL: @test_mm_cmplt_pd
393  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
394  return _mm_cmplt_pd(__a, __b);
395}
396
397__m128d test_mm_cmple_pd(__m128d __a, __m128d __b) {
398  // CHECK-LABEL: @test_mm_cmple_pd
399  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
400  return _mm_cmple_pd(__a, __b);
401}
402
403__m128d test_mm_cmpunord_pd(__m128d __a, __m128d __b) {
404  // CHECK-LABEL: @test_mm_cmpunord_pd
405  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
406  return _mm_cmpunord_pd(__a, __b);
407}
408
409__m128d test_mm_cmpneq_pd(__m128d __a, __m128d __b) {
410  // CHECK-LABEL: @test_mm_cmpneq_pd
411  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
412  return _mm_cmpneq_pd(__a, __b);
413}
414
415__m128d test_mm_cmpnlt_pd(__m128d __a, __m128d __b) {
416  // CHECK-LABEL: @test_mm_cmpnlt_pd
417  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
418  return _mm_cmpnlt_pd(__a, __b);
419}
420
421__m128d test_mm_cmpnle_pd(__m128d __a, __m128d __b) {
422  // CHECK-LABEL: @test_mm_cmpnle_pd
423  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
424  return _mm_cmpnle_pd(__a, __b);
425}
426
427__m128d test_mm_cmpord_pd(__m128d __a, __m128d __b) {
428  // CHECK-LABEL: @test_mm_cmpord_pd
429  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
430  return _mm_cmpord_pd(__a, __b);
431}
432
433__m128d test_mm_cmpgt_pd(__m128d __a, __m128d __b) {
434  // CHECK-LABEL: @test_mm_cmpgt_pd
435  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
436  return _mm_cmpgt_pd(__a, __b);
437}
438
439__m128d test_mm_cmpge_pd(__m128d __a, __m128d __b) {
440  // CHECK-LABEL: @test_mm_cmpge_pd
441  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
442  return _mm_cmpge_pd(__a, __b);
443}
444
445__m128d test_mm_cmpngt_pd(__m128d __a, __m128d __b) {
446  // CHECK-LABEL: @test_mm_cmpngt_pd
447  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
448  return _mm_cmpngt_pd(__a, __b);
449}
450
451__m128d test_mm_cmpnge_pd(__m128d __a, __m128d __b) {
452  // CHECK-LABEL: @test_mm_cmpnge_pd
453  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
454  return _mm_cmpnge_pd(__a, __b);
455}
456
457__m128 test_mm_slli_si128(__m128 a) {
458  // CHECK-LABEL: @test_mm_slli_si128
459  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
460  return _mm_slli_si128(a, 5);
461}
462
463__m128 test_mm_bslli_si128(__m128 a) {
464  // CHECK-LABEL: @test_mm_bslli_si128
465  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
466  return _mm_bslli_si128(a, 5);
467}
468
469__m128 test_mm_srli_si128(__m128 a) {
470  // CHECK-LABEL: @test_mm_srli_si128
471  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
472  return _mm_srli_si128(a, 5);
473}
474
475__m128 test_mm_bsrli_si128(__m128 a) {
476  // CHECK-LABEL: @test_mm_bsrli_si128
477  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
478  return _mm_bsrli_si128(a, 5);
479}
480
481__m128 test_mm_undefined_ps() {
482  // CHECK-LABEL: @test_mm_undefined_ps
483  // CHECK: ret <4 x float> undef
484  return _mm_undefined_ps();
485}
486
487__m128d test_mm_undefined_pd() {
488  // CHECK-LABEL: @test_mm_undefined_pd
489  // CHECK: ret <2 x double> undef
490  return _mm_undefined_pd();
491}
492
493__m128i test_mm_undefined_si128() {
494  // CHECK-LABEL: @test_mm_undefined_si128
495  // CHECK: ret <2 x i64> undef
496  return _mm_undefined_si128();
497}
498
499__m64 test_mm_add_si64(__m64 __a, __m64 __b) {
500  // CHECK-LABEL: @test_mm_add_si64
501  // CHECK @llvm.x86.mmx.padd.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
502  return _mm_add_si64(__a, __b);
503}
504
505__m64 test_mm_sub_si64(__m64 __a, __m64 __b) {
506  // CHECK-LABEL: @test_mm_sub_si64
507  // CHECK @llvm.x86.mmx.psub.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
508  return _mm_sub_si64(__a, __b);
509}
510
511__m64 test_mm_mul_su32(__m64 __a, __m64 __b) {
512  // CHECK-LABEL: @test_mm_mul_su32
513  // CHECK @llvm.x86.mmx.pmulu.dq(x86_mmx %{{.*}}, x86_mmx %{{.*}})
514  return _mm_mul_su32(__a, __b);
515}
516
517void test_mm_pause() {
518  // CHECK-LABEL: @test_mm_pause
519  // CHECK @llvm.x86.sse2.pause()
520  return _mm_pause();
521}
522