sse-builtins.c revision 0e2c34f92f00628d48968dfea096d36381f494cb
1// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
2
3#include <xmmintrin.h>
4#include <emmintrin.h>
5#include <smmintrin.h>
6
7__m128 test_rsqrt_ss(__m128 x) {
8  // CHECK: define {{.*}} @test_rsqrt_ss
9  // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss
10  // CHECK: extractelement <4 x float> {{.*}}, i32 0
11  // CHECK: extractelement <4 x float> {{.*}}, i32 1
12  // CHECK: extractelement <4 x float> {{.*}}, i32 2
13  // CHECK: extractelement <4 x float> {{.*}}, i32 3
14  return _mm_rsqrt_ss(x);
15}
16
17__m128 test_rcp_ss(__m128 x) {
18  // CHECK: define {{.*}} @test_rcp_ss
19  // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss
20  // CHECK: extractelement <4 x float> {{.*}}, i32 0
21  // CHECK: extractelement <4 x float> {{.*}}, i32 1
22  // CHECK: extractelement <4 x float> {{.*}}, i32 2
23  // CHECK: extractelement <4 x float> {{.*}}, i32 3
24  return _mm_rcp_ss(x);
25}
26
27__m128 test_sqrt_ss(__m128 x) {
28  // CHECK: define {{.*}} @test_sqrt_ss
29  // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
30  // CHECK: extractelement <4 x float> {{.*}}, i32 0
31  // CHECK: extractelement <4 x float> {{.*}}, i32 1
32  // CHECK: extractelement <4 x float> {{.*}}, i32 2
33  // CHECK: extractelement <4 x float> {{.*}}, i32 3
34  return _mm_sqrt_ss(x);
35}
36
37__m128 test_loadl_pi(__m128 x, void* y) {
38  // CHECK: define {{.*}} @test_loadl_pi
39  // CHECK: load <2 x float>* {{.*}}, align 1{{$}}
40  // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
41  // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
42  return _mm_loadl_pi(x,y);
43}
44
45__m128 test_loadh_pi(__m128 x, void* y) {
46  // CHECK: define {{.*}} @test_loadh_pi
47  // CHECK: load <2 x float>* {{.*}}, align 1{{$}}
48  // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
49  // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
50  return _mm_loadh_pi(x,y);
51}
52
53__m128 test_load_ss(void* y) {
54  // CHECK: define {{.*}} @test_load_ss
55  // CHECK: load float* {{.*}}, align 1{{$}}
56  return _mm_load_ss(y);
57}
58
59__m128 test_load1_ps(void* y) {
60  // CHECK: define {{.*}} @test_load1_ps
61  // CHECK: load float* {{.*}}, align 1{{$}}
62  return _mm_load1_ps(y);
63}
64
65void test_store_ss(__m128 x, void* y) {
66  // CHECK-LABEL: define void @test_store_ss
67  // CHECK: store {{.*}} float* {{.*}}, align 1{{$}}
68  _mm_store_ss(y, x);
69}
70
71__m128d test_load1_pd(__m128 x, void* y) {
72  // CHECK: define {{.*}} @test_load1_pd
73  // CHECK: load double* {{.*}}, align 1{{$}}
74  return _mm_load1_pd(y);
75}
76
77__m128d test_loadr_pd(__m128 x, void* y) {
78  // CHECK: define {{.*}} @test_loadr_pd
79  // CHECK: load <2 x double>* {{.*}}, align 16{{$}}
80  return _mm_loadr_pd(y);
81}
82
83__m128d test_load_sd(void* y) {
84  // CHECK: define {{.*}} @test_load_sd
85  // CHECK: load double* {{.*}}, align 1{{$}}
86  return _mm_load_sd(y);
87}
88
89__m128d test_loadh_pd(__m128d x, void* y) {
90  // CHECK: define {{.*}} @test_loadh_pd
91  // CHECK: load double* {{.*}}, align 1{{$}}
92  return _mm_loadh_pd(x, y);
93}
94
95__m128d test_loadl_pd(__m128d x, void* y) {
96  // CHECK: define {{.*}} @test_loadl_pd
97  // CHECK: load double* {{.*}}, align 1{{$}}
98  return _mm_loadl_pd(x, y);
99}
100
101void test_store_sd(__m128d x, void* y) {
102  // CHECK-LABEL: define void @test_store_sd
103  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
104  _mm_store_sd(y, x);
105}
106
107void test_store1_pd(__m128d x, void* y) {
108  // CHECK-LABEL: define void @test_store1_pd
109  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
110  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
111  _mm_store1_pd(y, x);
112}
113
114void test_storer_pd(__m128d x, void* y) {
115  // CHECK-LABEL: define void @test_storer_pd
116  // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
117  _mm_storer_pd(y, x);
118}
119
120void test_storeh_pd(__m128d x, void* y) {
121  // CHECK-LABEL: define void @test_storeh_pd
122  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
123  _mm_storeh_pd(y, x);
124}
125
126void test_storel_pd(__m128d x, void* y) {
127  // CHECK-LABEL: define void @test_storel_pd
128  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
129  _mm_storel_pd(y, x);
130}
131
132__m128i test_loadl_epi64(void* y) {
133  // CHECK: define {{.*}} @test_loadl_epi64
134  // CHECK: load i64* {{.*}}, align 1{{$}}
135  return _mm_loadl_epi64(y);
136}
137
138__m128i test_mm_minpos_epu16(__m128i x) {
139  // CHECK: define {{.*}} @test_mm_minpos_epu16
140  // CHECK: @llvm.x86.sse41.phminposuw
141  return _mm_minpos_epu16(x);
142}
143
144__m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
145  // CHECK: define {{.*}} @test_mm_mpsadbw_epu8
146  // CHECK: @llvm.x86.sse41.mpsadbw
147  return _mm_mpsadbw_epu8(x, y, 1);
148}
149
150__m128 test_mm_dp_ps(__m128 x, __m128 y) {
151  // CHECK: define {{.*}} @test_mm_dp_ps
152  // CHECK: @llvm.x86.sse41.dpps
153  return _mm_dp_ps(x, y, 2);
154}
155
156__m128d test_mm_dp_pd(__m128d x, __m128d y) {
157  // CHECK: define {{.*}} @test_mm_dp_pd
158  // CHECK: @llvm.x86.sse41.dppd
159  return _mm_dp_pd(x, y, 2);
160}
161
162__m128 test_mm_round_ps(__m128 x) {
163  // CHECK: define {{.*}} @test_mm_round_ps
164  // CHECK: @llvm.x86.sse41.round.ps
165  return _mm_round_ps(x, 2);
166}
167
168__m128 test_mm_round_ss(__m128 x, __m128 y) {
169  // CHECK: define {{.*}} @test_mm_round_ss
170  // CHECK: @llvm.x86.sse41.round.ss
171  return _mm_round_ss(x, y, 2);
172}
173
174__m128d test_mm_round_pd(__m128d x) {
175  // CHECK: define {{.*}} @test_mm_round_pd
176  // CHECK: @llvm.x86.sse41.round.pd
177  return _mm_round_pd(x, 2);
178}
179
180__m128d test_mm_round_sd(__m128d x, __m128d y) {
181  // CHECK: define {{.*}} @test_mm_round_sd
182  // CHECK: @llvm.x86.sse41.round.sd
183  return _mm_round_sd(x, y, 2);
184}
185
186void test_storel_epi64(__m128i x, void* y) {
187  // CHECK-LABEL: define void @test_storel_epi64
188  // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
189  _mm_storel_epi64(y, x);
190}
191
192void test_stream_si32(int x, void *y) {
193  // CHECK-LABEL: define void @test_stream_si32
194  // CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal
195  _mm_stream_si32(y, x);
196}
197
198void test_stream_si64(long long x, void *y) {
199  // CHECK-LABEL: define void @test_stream_si64
200  // CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal
201  _mm_stream_si64(y, x);
202}
203
204void test_stream_si128(__m128i x, void *y) {
205  // CHECK-LABEL: define void @test_stream_si128
206  // CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal
207  _mm_stream_si128(y, x);
208}
209
210void test_extract_epi16(__m128i __a) {
211  // CHECK-LABEL: define void @test_extract_epi16
212  // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
213  // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
214  _mm_extract_epi16(__a, 8);
215}
216
217int test_extract_ps(__m128i __a) {
218  // CHECK-LABEL: @test_extract_ps
219  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
220  return _mm_extract_ps(__a, 4);
221}
222
223int test_extract_epi8(__m128i __a) {
224  // CHECK-LABEL: @test_extract_epi8
225  // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
226  return _mm_extract_epi8(__a, 16);
227}
228
229int test_extract_epi32(__m128i __a) {
230  // CHECK-LABEL: @test_extract_epi32
231  // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
232  return _mm_extract_epi32(__a, 4);
233}
234
235void test_insert_epi32(__m128i __a, int b) {
236  // CHECK-LABEL: @test_insert_epi32
237  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
238   _mm_insert_epi32(__a, b, 4);
239}
240
241__m128d test_blend_pd(__m128d V1, __m128d V2) {
242  // CHECK-LABEL: @test_blend_pd
243  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 2, i32 1>
244  return _mm_blend_pd(V1, V2, 1);
245}
246
247__m128 test_blend_ps(__m128 V1, __m128 V2) {
248  // CHECK-LABEL: @test_blend_ps
249  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
250  return _mm_blend_ps(V1, V2, 5);
251}
252
253__m128i test_blend_epi16(__m128i V1, __m128i V2) {
254  // CHECK-LABEL: @test_blend_epi16
255  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
256  return _mm_blend_epi16(V1, V2, 42);
257}
258
259__m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
260  // CHECK-LABEL: @test_mm_cmpeq_ss
261  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
262  return _mm_cmpeq_ss(__a, __b);
263}
264
265__m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
266  // CHECK-LABEL: @test_mm_cmplt_ss
267  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
268  return _mm_cmplt_ss(__a, __b);
269}
270
271__m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
272  // CHECK-LABEL: @test_mm_cmple_ss
273  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
274  return _mm_cmple_ss(__a, __b);
275}
276
277__m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
278  // CHECK-LABEL: @test_mm_cmpunord_ss
279  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
280  return _mm_cmpunord_ss(__a, __b);
281}
282
283__m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
284  // CHECK-LABEL: @test_mm_cmpneq_ss
285  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
286  return _mm_cmpneq_ss(__a, __b);
287}
288
289__m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
290  // CHECK-LABEL: @test_mm_cmpnlt_ss
291  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
292  return _mm_cmpnlt_ss(__a, __b);
293}
294
295__m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
296  // CHECK-LABEL: @test_mm_cmpnle_ss
297  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
298  return _mm_cmpnle_ss(__a, __b);
299}
300
301__m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
302  // CHECK-LABEL: @test_mm_cmpord_ss
303  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
304  return _mm_cmpord_ss(__a, __b);
305}
306
307__m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
308  // CHECK-LABEL: @test_mm_cmpgt_ss
309  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
310  return _mm_cmpgt_ss(__a, __b);
311}
312
313__m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
314  // CHECK-LABEL: @test_mm_cmpge_ss
315  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
316  return _mm_cmpge_ss(__a, __b);
317}
318
319__m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
320  // CHECK-LABEL: @test_mm_cmpngt_ss
321  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
322  return _mm_cmpngt_ss(__a, __b);
323}
324
325__m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
326  // CHECK-LABEL: @test_mm_cmpnge_ss
327  // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
328  return _mm_cmpnge_ss(__a, __b);
329}
330
331__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
332  // CHECK-LABEL: @test_mm_cmpeq_ps
333  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
334  return _mm_cmpeq_ps(__a, __b);
335}
336
337__m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
338  // CHECK-LABEL: @test_mm_cmplt_ps
339  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
340  return _mm_cmplt_ps(__a, __b);
341}
342
343__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
344  // CHECK-LABEL: @test_mm_cmple_ps
345  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
346  return _mm_cmple_ps(__a, __b);
347}
348
349__m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
350  // CHECK-LABEL: @test_mm_cmpunord_ps
351  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
352  return _mm_cmpunord_ps(__a, __b);
353}
354
355__m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
356  // CHECK-LABEL: @test_mm_cmpneq_ps
357  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
358  return _mm_cmpneq_ps(__a, __b);
359}
360
361__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
362  // CHECK-LABEL: @test_mm_cmpnlt_ps
363  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
364  return _mm_cmpnlt_ps(__a, __b);
365}
366
367__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
368  // CHECK-LABEL: @test_mm_cmpnle_ps
369  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
370  return _mm_cmpnle_ps(__a, __b);
371}
372
373__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
374  // CHECK-LABEL: @test_mm_cmpord_ps
375  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
376  return _mm_cmpord_ps(__a, __b);
377}
378
379__m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
380  // CHECK-LABEL: @test_mm_cmpgt_ps
381  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
382  return _mm_cmpgt_ps(__a, __b);
383}
384
385__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
386  // CHECK-LABEL: @test_mm_cmpge_ps
387  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
388  return _mm_cmpge_ps(__a, __b);
389}
390
391__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
392  // CHECK-LABEL: @test_mm_cmpngt_ps
393  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
394  return _mm_cmpngt_ps(__a, __b);
395}
396
397__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
398  // CHECK-LABEL: @test_mm_cmpnge_ps
399  // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
400  return _mm_cmpnge_ps(__a, __b);
401}
402
403__m128d test_mm_cmpeq_sd(__m128d __a, __m128d __b) {
404  // CHECK-LABEL: @test_mm_cmpeq_sd
405  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
406  return _mm_cmpeq_sd(__a, __b);
407}
408
409__m128d test_mm_cmplt_sd(__m128d __a, __m128d __b) {
410  // CHECK-LABEL: @test_mm_cmplt_sd
411  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
412  return _mm_cmplt_sd(__a, __b);
413}
414
415__m128d test_mm_cmple_sd(__m128d __a, __m128d __b) {
416  // CHECK-LABEL: @test_mm_cmple_sd
417  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
418  return _mm_cmple_sd(__a, __b);
419}
420
421__m128d test_mm_cmpunord_sd(__m128d __a, __m128d __b) {
422  // CHECK-LABEL: @test_mm_cmpunord_sd
423  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
424  return _mm_cmpunord_sd(__a, __b);
425}
426
427__m128d test_mm_cmpneq_sd(__m128d __a, __m128d __b) {
428  // CHECK-LABEL: @test_mm_cmpneq_sd
429  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
430  return _mm_cmpneq_sd(__a, __b);
431}
432
433__m128d test_mm_cmpnlt_sd(__m128d __a, __m128d __b) {
434  // CHECK-LABEL: @test_mm_cmpnlt_sd
435  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
436  return _mm_cmpnlt_sd(__a, __b);
437}
438
439__m128d test_mm_cmpnle_sd(__m128d __a, __m128d __b) {
440  // CHECK-LABEL: @test_mm_cmpnle_sd
441  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
442  return _mm_cmpnle_sd(__a, __b);
443}
444
445__m128d test_mm_cmpord_sd(__m128d __a, __m128d __b) {
446  // CHECK-LABEL: @test_mm_cmpord_sd
447  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
448  return _mm_cmpord_sd(__a, __b);
449}
450
451__m128d test_mm_cmpgt_sd(__m128d __a, __m128d __b) {
452  // CHECK-LABEL: @test_mm_cmpgt_sd
453  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
454  return _mm_cmpgt_sd(__a, __b);
455}
456
457__m128d test_mm_cmpge_sd(__m128d __a, __m128d __b) {
458  // CHECK-LABEL: @test_mm_cmpge_sd
459  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
460  return _mm_cmpge_sd(__a, __b);
461}
462
463__m128d test_mm_cmpngt_sd(__m128d __a, __m128d __b) {
464  // CHECK-LABEL: @test_mm_cmpngt_sd
465  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
466  return _mm_cmpngt_sd(__a, __b);
467}
468
469__m128d test_mm_cmpnge_sd(__m128d __a, __m128d __b) {
470  // CHECK-LABEL: @test_mm_cmpnge_sd
471  // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
472  return _mm_cmpnge_sd(__a, __b);
473}
474
475__m128d test_mm_cmpeq_pd(__m128d __a, __m128d __b) {
476  // CHECK-LABEL: @test_mm_cmpeq_pd
477  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
478  return _mm_cmpeq_pd(__a, __b);
479}
480
481__m128d test_mm_cmplt_pd(__m128d __a, __m128d __b) {
482  // CHECK-LABEL: @test_mm_cmplt_pd
483  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
484  return _mm_cmplt_pd(__a, __b);
485}
486
487__m128d test_mm_cmple_pd(__m128d __a, __m128d __b) {
488  // CHECK-LABEL: @test_mm_cmple_pd
489  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
490  return _mm_cmple_pd(__a, __b);
491}
492
493__m128d test_mm_cmpunord_pd(__m128d __a, __m128d __b) {
494  // CHECK-LABEL: @test_mm_cmpunord_pd
495  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
496  return _mm_cmpunord_pd(__a, __b);
497}
498
499__m128d test_mm_cmpneq_pd(__m128d __a, __m128d __b) {
500  // CHECK-LABEL: @test_mm_cmpneq_pd
501  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
502  return _mm_cmpneq_pd(__a, __b);
503}
504
505__m128d test_mm_cmpnlt_pd(__m128d __a, __m128d __b) {
506  // CHECK-LABEL: @test_mm_cmpnlt_pd
507  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
508  return _mm_cmpnlt_pd(__a, __b);
509}
510
511__m128d test_mm_cmpnle_pd(__m128d __a, __m128d __b) {
512  // CHECK-LABEL: @test_mm_cmpnle_pd
513  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
514  return _mm_cmpnle_pd(__a, __b);
515}
516
517__m128d test_mm_cmpord_pd(__m128d __a, __m128d __b) {
518  // CHECK-LABEL: @test_mm_cmpord_pd
519  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
520  return _mm_cmpord_pd(__a, __b);
521}
522
523__m128d test_mm_cmpgt_pd(__m128d __a, __m128d __b) {
524  // CHECK-LABEL: @test_mm_cmpgt_pd
525  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
526  return _mm_cmpgt_pd(__a, __b);
527}
528
529__m128d test_mm_cmpge_pd(__m128d __a, __m128d __b) {
530  // CHECK-LABEL: @test_mm_cmpge_pd
531  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
532  return _mm_cmpge_pd(__a, __b);
533}
534
535__m128d test_mm_cmpngt_pd(__m128d __a, __m128d __b) {
536  // CHECK-LABEL: @test_mm_cmpngt_pd
537  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
538  return _mm_cmpngt_pd(__a, __b);
539}
540
541__m128d test_mm_cmpnge_pd(__m128d __a, __m128d __b) {
542  // CHECK-LABEL: @test_mm_cmpnge_pd
543  // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
544  return _mm_cmpnge_pd(__a, __b);
545}
546
547__m128 test_mm_slli_si128(__m128 a) {
548  // CHECK-LABEL: @test_mm_slli_si128
549  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
550  return _mm_slli_si128(a, 5);
551}
552
553__m128 test_mm_bslli_si128(__m128 a) {
554  // CHECK-LABEL: @test_mm_bslli_si128
555  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
556  return _mm_bslli_si128(a, 5);
557}
558
559__m128 test_mm_srli_si128(__m128 a) {
560  // CHECK-LABEL: @test_mm_srli_si128
561  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
562  return _mm_srli_si128(a, 5);
563}
564
565__m128 test_mm_bsrli_si128(__m128 a) {
566  // CHECK-LABEL: @test_mm_bsrli_si128
567  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
568  return _mm_bsrli_si128(a, 5);
569}
570
571__m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
572  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
573  return _mm_alignr_epi8(a, b, 2);
574}
575
576__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
577  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
578  return _mm_alignr_epi8(a, b, 17);
579}
580