1// REQUIRES: x86-registered-target
2// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +ssse3 -S -o - | FileCheck %s
3
4// FIXME: Disable inclusion of mm_malloc.h, our current implementation is broken
5// on win32 since we don't generally know how to find errno.h.
6#define __MM_MALLOC_H
7
8#include <tmmintrin.h>
9
10__m64 test1(__m64 a, __m64 b) {
11  // CHECK: phaddw
12  return _mm_hadd_pi16(a, b);
13}
14
15__m64 test2(__m64 a, __m64 b) {
16  // CHECK: phaddd
17  return _mm_hadd_pi32(a, b);
18}
19
20__m64 test3(__m64 a, __m64 b) {
21  // CHECK: phaddsw
22  return _mm_hadds_pi16(a, b);
23}
24
25__m64 test4(__m64 a, __m64 b) {
26  // CHECK: phsubw
27  return _mm_hsub_pi16(a, b);
28}
29
30__m64 test5(__m64 a, __m64 b) {
31  // CHECK: phsubd
32  return _mm_hsub_pi32(a, b);
33}
34
35__m64 test6(__m64 a, __m64 b) {
36  // CHECK: phsubsw
37  return _mm_hsubs_pi16(a, b);
38}
39
40__m64 test7(__m64 a, __m64 b) {
41  // CHECK: pmaddubsw
42  return _mm_maddubs_pi16(a, b);
43}
44
45__m64 test8(__m64 a, __m64 b) {
46  // CHECK: pmulhrsw
47  return _mm_mulhrs_pi16(a, b);
48}
49
50__m64 test9(__m64 a, __m64 b) {
51  // CHECK: pshufb
52  return _mm_shuffle_pi8(a, b);
53}
54
55__m64 test10(__m64 a, __m64 b) {
56  // CHECK: psignb
57  return _mm_sign_pi8(a, b);
58}
59
60__m64 test11(__m64 a, __m64 b) {
61  // CHECK: psignw
62  return _mm_sign_pi16(a, b);
63}
64
65__m64 test12(__m64 a, __m64 b) {
66  // CHECK: psignd
67  return _mm_sign_pi32(a, b);
68}
69
70__m64 test13(__m64 a) {
71  // CHECK: pabsb
72  return _mm_abs_pi8(a);
73}
74
75__m64 test14(__m64 a) {
76  // CHECK: pabsw
77  return _mm_abs_pi16(a);
78}
79
80__m64 test15(__m64 a) {
81  // CHECK: pabsd
82  return _mm_abs_pi32(a);
83}
84
85__m64 test16(__m64 a, __m64 b) {
86  // CHECK: palignr
87  return _mm_alignr_pi8(a, b, 2);
88}
89
90__m64 test17(__m128d a) {
91  // CHECK: cvtpd2pi
92  return _mm_cvtpd_pi32(a);
93}
94
95__m64 test18(__m128d a) {
96  // CHECK: cvttpd2pi
97  return _mm_cvttpd_pi32(a);
98}
99
100__m128d test19(__m64 a) {
101  // CHECK: cvtpi2pd
102  return _mm_cvtpi32_pd(a);
103}
104
105__m64 test20(__m64 a, __m64 b) {
106  // CHECK: pmuludq
107  return _mm_mul_su32(a, b);
108}
109
110__m64 test21(__m64 a) {
111  // CHECK: pshufw
112  return _mm_shuffle_pi16(a, 3);
113}
114
115__m64 test22(__m64 a, __m64 b) {
116  // CHECK: pmulhuw
117  return _mm_mulhi_pu16(a, b);
118}
119
120void test23(__m64 d, __m64 n, char *p) {
121  // CHECK: maskmovq
122  _mm_maskmove_si64(d, n, p);
123}
124
125int test24(__m64 a) {
126  // CHECK: pmovmskb
127  return _mm_movemask_pi8(a);
128}
129
130void test25(__m64 *p, __m64 a) {
131  // CHECK: movntq
132  _mm_stream_pi(p, a);
133}
134
135__m64 test26(__m64 a, __m64 b) {
136  // CHECK: pavgb
137  return _mm_avg_pu8(a, b);
138}
139
140__m64 test27(__m64 a, __m64 b) {
141  // CHECK: pavgw
142  return _mm_avg_pu16(a, b);
143}
144
145__m64 test28(__m64 a, __m64 b) {
146  // CHECK: pmaxub
147  return _mm_max_pu8(a, b);
148}
149
150__m64 test29(__m64 a, __m64 b) {
151  // CHECK: pmaxsw
152  return _mm_max_pi16(a, b);
153}
154
155__m64 test30(__m64 a, __m64 b) {
156  // CHECK: pminub
157  return _mm_min_pu8(a, b);
158}
159
160__m64 test31(__m64 a, __m64 b) {
161  // CHECK: pminsw
162  return _mm_min_pi16(a, b);
163}
164
165__m64 test32(__m64 a, __m64 b) {
166  // CHECK: psadbw
167  return _mm_sad_pu8(a, b);
168}
169
170__m64 test33(__m64 a, __m64 b) {
171  // CHECK: paddb
172  return _mm_add_pi8(a, b);
173}
174
175__m64 test34(__m64 a, __m64 b) {
176  // CHECK: paddw
177  return _mm_add_pi16(a, b);
178}
179
180__m64 test35(__m64 a, __m64 b) {
181  // CHECK: paddd
182  return _mm_add_pi32(a, b);
183}
184
185__m64 test36(__m64 a, __m64 b) {
186  // CHECK: paddq
187  return __builtin_ia32_paddq(a, b);
188}
189
190__m64 test37(__m64 a, __m64 b) {
191  // CHECK: paddsb
192  return _mm_adds_pi8(a, b);
193}
194
195__m64 test38(__m64 a, __m64 b) {
196  // CHECK: paddsw
197  return _mm_adds_pi16(a, b);
198}
199
200__m64 test39(__m64 a, __m64 b) {
201  // CHECK: paddusb
202  return _mm_adds_pu8(a, b);
203}
204
205__m64 test40(__m64 a, __m64 b) {
206  // CHECK: paddusw
207  return _mm_adds_pu16(a, b);
208}
209
210__m64 test41(__m64 a, __m64 b) {
211  // CHECK: psubb
212  return _mm_sub_pi8(a, b);
213}
214
215__m64 test42(__m64 a, __m64 b) {
216  // CHECK: psubw
217  return _mm_sub_pi16(a, b);
218}
219
220__m64 test43(__m64 a, __m64 b) {
221  // CHECK: psubd
222  return _mm_sub_pi32(a, b);
223}
224
225__m64 test44(__m64 a, __m64 b) {
226  // CHECK: psubq
227  return __builtin_ia32_psubq(a, b);
228}
229
230__m64 test45(__m64 a, __m64 b) {
231  // CHECK: psubsb
232  return _mm_subs_pi8(a, b);
233}
234
235__m64 test46(__m64 a, __m64 b) {
236  // CHECK: psubsw
237  return _mm_subs_pi16(a, b);
238}
239
240__m64 test47(__m64 a, __m64 b) {
241  // CHECK: psubusb
242  return _mm_subs_pu8(a, b);
243}
244
245__m64 test48(__m64 a, __m64 b) {
246  // CHECK: psubusw
247  return _mm_subs_pu16(a, b);
248}
249
250__m64 test49(__m64 a, __m64 b) {
251  // CHECK: pmaddwd
252  return _mm_madd_pi16(a, b);
253}
254
255__m64 test50(__m64 a, __m64 b) {
256  // CHECK: pmulhw
257  return _mm_mulhi_pi16(a, b);
258}
259
260__m64 test51(__m64 a, __m64 b) {
261  // CHECK: pmullw
262  return _mm_mullo_pi16(a, b);
263}
264
265__m64 test52(__m64 a, __m64 b) {
266  // CHECK: pmullw
267  return _mm_mullo_pi16(a, b);
268}
269
270__m64 test53(__m64 a, __m64 b) {
271  // CHECK: pand
272  return _mm_and_si64(a, b);
273}
274
275__m64 test54(__m64 a, __m64 b) {
276  // CHECK: pandn
277  return _mm_andnot_si64(a, b);
278}
279
280__m64 test55(__m64 a, __m64 b) {
281  // CHECK: por
282  return _mm_or_si64(a, b);
283}
284
285__m64 test56(__m64 a, __m64 b) {
286  // CHECK: pxor
287  return _mm_xor_si64(a, b);
288}
289
290__m64 test57(__m64 a, __m64 b) {
291  // CHECK: pavgb
292  return _mm_avg_pu8(a, b);
293}
294
295__m64 test58(__m64 a, __m64 b) {
296  // CHECK: pavgw
297  return _mm_avg_pu16(a, b);
298}
299
300__m64 test59(__m64 a, __m64 b) {
301  // CHECK: psllw
302  return _mm_sll_pi16(a, b);
303}
304
305__m64 test60(__m64 a, __m64 b) {
306  // CHECK: pslld
307  return _mm_sll_pi32(a, b);
308}
309
310__m64 test61(__m64 a, __m64 b) {
311  // CHECK: psllq
312  return _mm_sll_si64(a, b);
313}
314
315__m64 test62(__m64 a, __m64 b) {
316  // CHECK: psrlw
317  return _mm_srl_pi16(a, b);
318}
319
320__m64 test63(__m64 a, __m64 b) {
321  // CHECK: psrld
322  return _mm_srl_pi32(a, b);
323}
324
325__m64 test64(__m64 a, __m64 b) {
326  // CHECK: psrlq
327  return _mm_srl_si64(a, b);
328}
329
330__m64 test65(__m64 a, __m64 b) {
331  // CHECK: psraw
332  return _mm_sra_pi16(a, b);
333}
334
335__m64 test66(__m64 a, __m64 b) {
336  // CHECK: psrad
337  return _mm_sra_pi32(a, b);
338}
339
340__m64 test67(__m64 a) {
341  // CHECK: psllw
342  return _mm_slli_pi16(a, 3);
343}
344
345__m64 test68(__m64 a) {
346  // CHECK: pslld
347  return _mm_slli_pi32(a, 3);
348}
349
350__m64 test69(__m64 a) {
351  // CHECK: psllq
352  return _mm_slli_si64(a, 3);
353}
354
355__m64 test70(__m64 a) {
356  // CHECK: psrlw
357  return _mm_srli_pi16(a, 3);
358}
359
360__m64 test71(__m64 a) {
361  // CHECK: psrld
362  return _mm_srli_pi32(a, 3);
363}
364
365__m64 test72(__m64 a) {
366  // CHECK: psrlq
367  return _mm_srli_si64(a, 3);
368}
369
370__m64 test73(__m64 a) {
371  // CHECK: psraw
372  return _mm_srai_pi16(a, 3);
373}
374
375__m64 test74(__m64 a) {
376  // CHECK: psrad
377  return _mm_srai_pi32(a, 3);
378}
379
380__m64 test75(__m64 a, __m64 b) {
381  // CHECK: packsswb
382  return _mm_packs_pi16(a, b);
383}
384
385__m64 test76(__m64 a, __m64 b) {
386  // CHECK: packssdw
387  return _mm_packs_pi32(a, b);
388}
389
390__m64 test77(__m64 a, __m64 b) {
391  // CHECK: packuswb
392  return _mm_packs_pu16(a, b);
393}
394
395__m64 test78(__m64 a, __m64 b) {
396  // CHECK: punpckhbw
397  return _mm_unpackhi_pi8(a, b);
398}
399
400__m64 test79(__m64 a, __m64 b) {
401  // CHECK: punpckhwd
402  return _mm_unpackhi_pi16(a, b);
403}
404
405__m64 test80(__m64 a, __m64 b) {
406  // CHECK: punpckhdq
407  return _mm_unpackhi_pi32(a, b);
408}
409
410__m64 test81(__m64 a, __m64 b) {
411  // CHECK: punpcklbw
412  return _mm_unpacklo_pi8(a, b);
413}
414
415__m64 test82(__m64 a, __m64 b) {
416  // CHECK: punpcklwd
417  return _mm_unpacklo_pi16(a, b);
418}
419
420__m64 test83(__m64 a, __m64 b) {
421  // CHECK: punpckldq
422  return _mm_unpacklo_pi32(a, b);
423}
424
425__m64 test84(__m64 a, __m64 b) {
426  // CHECK: pcmpeqb
427  return _mm_cmpeq_pi8(a, b);
428}
429
430__m64 test85(__m64 a, __m64 b) {
431  // CHECK: pcmpeqw
432  return _mm_cmpeq_pi16(a, b);
433}
434
435__m64 test86(__m64 a, __m64 b) {
436  // CHECK: pcmpeqd
437  return _mm_cmpeq_pi32(a, b);
438}
439
440__m64 test87(__m64 a, __m64 b) {
441  // CHECK: pcmpgtb
442  return _mm_cmpgt_pi8(a, b);
443}
444
445__m64 test88(__m64 a, __m64 b) {
446  // CHECK: pcmpgtw
447  return _mm_cmpgt_pi16(a, b);
448}
449
450__m64 test89(__m64 a, __m64 b) {
451  // CHECK: pcmpgtd
452  return _mm_cmpgt_pi32(a, b);
453}
454