1; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
2target datalayout = "E-m:e-i64:64-n32:64"
3target triple = "powerpc64-unknown-linux-gnu"
4
5define double @test1(double %a, double %b) {
6entry:
7  %v = fmul double %a, %b
8  ret double %v
9
10; CHECK-LABEL: @test1
11; CHECK: xsmuldp 1, 1, 2
12; CHECK: blr
13}
14
15define double @test2(double %a, double %b) {
16entry:
17  %v = fdiv double %a, %b
18  ret double %v
19
20; CHECK-LABEL: @test2
21; CHECK: xsdivdp 1, 1, 2
22; CHECK: blr
23}
24
25define double @test3(double %a, double %b) {
26entry:
27  %v = fadd double %a, %b
28  ret double %v
29
30; CHECK-LABEL: @test3
31; CHECK: xsadddp 1, 1, 2
32; CHECK: blr
33}
34
35define <2 x double> @test4(<2 x double> %a, <2 x double> %b) {
36entry:
37  %v = fadd <2 x double> %a, %b
38  ret <2 x double> %v
39
40; CHECK-LABEL: @test4
41; CHECK: xvadddp 34, 34, 35
42; CHECK: blr
43}
44
45define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
46entry:
47  %v = xor <4 x i32> %a, %b
48  ret <4 x i32> %v
49
50; CHECK-LABEL: @test5
51; CHECK: xxlxor 34, 34, 35
52; CHECK: blr
53}
54
55define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
56entry:
57  %v = xor <8 x i16> %a, %b
58  ret <8 x i16> %v
59
60; CHECK-LABEL: @test6
61; CHECK: xxlxor 34, 34, 35
62; CHECK: blr
63}
64
65define <16 x i8> @test7(<16 x i8> %a, <16 x i8> %b) {
66entry:
67  %v = xor <16 x i8> %a, %b
68  ret <16 x i8> %v
69
70; CHECK-LABEL: @test7
71; CHECK: xxlxor 34, 34, 35
72; CHECK: blr
73}
74
75define <4 x i32> @test8(<4 x i32> %a, <4 x i32> %b) {
76entry:
77  %v = or <4 x i32> %a, %b
78  ret <4 x i32> %v
79
80; CHECK-LABEL: @test8
81; CHECK: xxlor 34, 34, 35
82; CHECK: blr
83}
84
85define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
86entry:
87  %v = or <8 x i16> %a, %b
88  ret <8 x i16> %v
89
90; CHECK-LABEL: @test9
91; CHECK: xxlor 34, 34, 35
92; CHECK: blr
93}
94
95define <16 x i8> @test10(<16 x i8> %a, <16 x i8> %b) {
96entry:
97  %v = or <16 x i8> %a, %b
98  ret <16 x i8> %v
99
100; CHECK-LABEL: @test10
101; CHECK: xxlor 34, 34, 35
102; CHECK: blr
103}
104
105define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
106entry:
107  %v = and <4 x i32> %a, %b
108  ret <4 x i32> %v
109
110; CHECK-LABEL: @test11
111; CHECK: xxland 34, 34, 35
112; CHECK: blr
113}
114
115define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
116entry:
117  %v = and <8 x i16> %a, %b
118  ret <8 x i16> %v
119
120; CHECK-LABEL: @test12
121; CHECK: xxland 34, 34, 35
122; CHECK: blr
123}
124
125define <16 x i8> @test13(<16 x i8> %a, <16 x i8> %b) {
126entry:
127  %v = and <16 x i8> %a, %b
128  ret <16 x i8> %v
129
130; CHECK-LABEL: @test13
131; CHECK: xxland 34, 34, 35
132; CHECK: blr
133}
134
135define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) {
136entry:
137  %v = or <4 x i32> %a, %b
138  %w = xor <4 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1>
139  ret <4 x i32> %w
140
141; CHECK-LABEL: @test14
142; CHECK: xxlnor 34, 34, 35
143; CHECK: blr
144}
145
146define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
147entry:
148  %v = or <8 x i16> %a, %b
149  %w = xor <8 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
150  ret <8 x i16> %w
151
152; CHECK-LABEL: @test15
153; CHECK: xxlnor 34, 34, 35
154; CHECK: blr
155}
156
157define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
158entry:
159  %v = or <16 x i8> %a, %b
160  %w = xor <16 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
161  ret <16 x i8> %w
162
163; CHECK-LABEL: @test16
164; CHECK: xxlnor 34, 34, 35
165; CHECK: blr
166}
167
168define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
169entry:
170  %w = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
171  %v = and <4 x i32> %a, %w
172  ret <4 x i32> %v
173
174; CHECK-LABEL: @test17
175; CHECK: xxlandc 34, 34, 35
176; CHECK: blr
177}
178
179define <8 x i16> @test18(<8 x i16> %a, <8 x i16> %b) {
180entry:
181  %w = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
182  %v = and <8 x i16> %a, %w
183  ret <8 x i16> %v
184
185; CHECK-LABEL: @test18
186; CHECK: xxlandc 34, 34, 35
187; CHECK: blr
188}
189
190define <16 x i8> @test19(<16 x i8> %a, <16 x i8> %b) {
191entry:
192  %w = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
193  %v = and <16 x i8> %a, %w
194  ret <16 x i8> %v
195
196; CHECK-LABEL: @test19
197; CHECK: xxlandc 34, 34, 35
198; CHECK: blr
199}
200
201define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
202entry:
203  %m = icmp eq <4 x i32> %c, %d
204  %v = select <4 x i1> %m, <4 x i32> %a, <4 x i32> %b
205  ret <4 x i32> %v
206
207; CHECK-LABEL: @test20
208; CHECK: vcmpequw {{[0-9]+}}, 4, 5
209; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
210; CHECK: blr
211}
212
213define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
214entry:
215  %m = fcmp oeq <4 x float> %c, %d
216  %v = select <4 x i1> %m, <4 x float> %a, <4 x float> %b
217  ret <4 x float> %v
218
219; CHECK-LABEL: @test21
220; CHECK: xvcmpeqsp [[V1:[0-9]+]], 36, 37
221; CHECK: xxsel 34, 35, 34, [[V1]]
222; CHECK: blr
223}
224
225define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
226entry:
227  %m = fcmp ueq <4 x float> %c, %d
228  %v = select <4 x i1> %m, <4 x float> %a, <4 x float> %b
229  ret <4 x float> %v
230
231; CHECK-LABEL: @test22
232; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37
233; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36
234; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37
235; CHECK-DAG: xxlnor
236; CHECK-DAG: xxlnor
237; CHECK-DAG: xxlor
238; CHECK-DAG: xxlor
239; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
240; CHECK: blr
241}
242
243define <8 x i16> @test23(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
244entry:
245  %m = icmp eq <8 x i16> %c, %d
246  %v = select <8 x i1> %m, <8 x i16> %a, <8 x i16> %b
247  ret <8 x i16> %v
248
249; CHECK-LABEL: @test23
250; CHECK: vcmpequh {{[0-9]+}}, 4, 5
251; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
252; CHECK: blr
253}
254
255define <16 x i8> @test24(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
256entry:
257  %m = icmp eq <16 x i8> %c, %d
258  %v = select <16 x i1> %m, <16 x i8> %a, <16 x i8> %b
259  ret <16 x i8> %v
260
261; CHECK-LABEL: @test24
262; CHECK: vcmpequb {{[0-9]+}}, 4, 5
263; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
264; CHECK: blr
265}
266
267define <2 x double> @test25(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
268entry:
269  %m = fcmp oeq <2 x double> %c, %d
270  %v = select <2 x i1> %m, <2 x double> %a, <2 x double> %b
271  ret <2 x double> %v
272
273; CHECK-LABEL: @test25
274; CHECK: xvcmpeqdp [[V1:[0-9]+]], 36, 37
275; CHECK: xxsel 34, 35, 34, [[V1]]
276; CHECK: blr
277}
278
279define <2 x i64> @test26(<2 x i64> %a, <2 x i64> %b) {
280  %v = add <2 x i64> %a, %b
281  ret <2 x i64> %v
282
283; CHECK-LABEL: @test26
284
285; Make sure we use only two stores (one for each operand).
286; CHECK: stxvd2x 35,
287; CHECK: stxvd2x 34,
288; CHECK-NOT: stxvd2x
289
290; FIXME: The code quality here is not good; just make sure we do something for now.
291; CHECK: add
292; CHECK: add
293; CHECK: blr
294}
295
296define <2 x i64> @test27(<2 x i64> %a, <2 x i64> %b) {
297  %v = and <2 x i64> %a, %b
298  ret <2 x i64> %v
299
300; CHECK-LABEL: @test27
301; CHECK: xxland 34, 34, 35
302; CHECK: blr
303}
304
305define <2 x double> @test28(<2 x double>* %a) {
306  %v = load <2 x double>* %a, align 16
307  ret <2 x double> %v
308
309; CHECK-LABEL: @test28
310; CHECK: lxvd2x 34, 0, 3
311; CHECK: blr
312}
313
314define void @test29(<2 x double>* %a, <2 x double> %b) {
315  store <2 x double> %b, <2 x double>* %a, align 16
316  ret void
317
318; CHECK-LABEL: @test29
319; CHECK: stxvd2x 34, 0, 3
320; CHECK: blr
321}
322
323define <2 x double> @test28u(<2 x double>* %a) {
324  %v = load <2 x double>* %a, align 8
325  ret <2 x double> %v
326
327; CHECK-LABEL: @test28u
328; CHECK: lxvd2x 34, 0, 3
329; CHECK: blr
330}
331
332define void @test29u(<2 x double>* %a, <2 x double> %b) {
333  store <2 x double> %b, <2 x double>* %a, align 8
334  ret void
335
336; CHECK-LABEL: @test29u
337; CHECK: stxvd2x 34, 0, 3
338; CHECK: blr
339}
340
341define <2 x i64> @test30(<2 x i64>* %a) {
342  %v = load <2 x i64>* %a, align 16
343  ret <2 x i64> %v
344
345; CHECK-LABEL: @test30
346; CHECK: lxvd2x 34, 0, 3
347; CHECK: blr
348}
349
350define void @test31(<2 x i64>* %a, <2 x i64> %b) {
351  store <2 x i64> %b, <2 x i64>* %a, align 16
352  ret void
353
354; CHECK-LABEL: @test31
355; CHECK: stxvd2x 34, 0, 3
356; CHECK: blr
357}
358
359define <2 x double> @test40(<2 x i64> %a) {
360  %v = uitofp <2 x i64> %a to <2 x double>
361  ret <2 x double> %v
362
363; CHECK-LABEL: @test40
364; CHECK: xvcvuxddp 34, 34
365; CHECK: blr
366}
367
368define <2 x double> @test41(<2 x i64> %a) {
369  %v = sitofp <2 x i64> %a to <2 x double>
370  ret <2 x double> %v
371
372; CHECK-LABEL: @test41
373; CHECK: xvcvsxddp 34, 34
374; CHECK: blr
375}
376
377define <2 x i64> @test42(<2 x double> %a) {
378  %v = fptoui <2 x double> %a to <2 x i64>
379  ret <2 x i64> %v
380
381; CHECK-LABEL: @test42
382; CHECK: xvcvdpuxds 34, 34
383; CHECK: blr
384}
385
386define <2 x i64> @test43(<2 x double> %a) {
387  %v = fptosi <2 x double> %a to <2 x i64>
388  ret <2 x i64> %v
389
390; CHECK-LABEL: @test43
391; CHECK: xvcvdpsxds 34, 34
392; CHECK: blr
393}
394
395define <2 x float> @test44(<2 x i64> %a) {
396  %v = uitofp <2 x i64> %a to <2 x float>
397  ret <2 x float> %v
398
399; CHECK-LABEL: @test44
400; FIXME: The code quality here looks pretty bad.
401; CHECK: blr
402}
403
404define <2 x float> @test45(<2 x i64> %a) {
405  %v = sitofp <2 x i64> %a to <2 x float>
406  ret <2 x float> %v
407
408; CHECK-LABEL: @test45
409; FIXME: The code quality here looks pretty bad.
410; CHECK: blr
411}
412
413define <2 x i64> @test46(<2 x float> %a) {
414  %v = fptoui <2 x float> %a to <2 x i64>
415  ret <2 x i64> %v
416
417; CHECK-LABEL: @test46
418; FIXME: The code quality here looks pretty bad.
419; CHECK: blr
420}
421
422define <2 x i64> @test47(<2 x float> %a) {
423  %v = fptosi <2 x float> %a to <2 x i64>
424  ret <2 x i64> %v
425
426; CHECK-LABEL: @test47
427; FIXME: The code quality here looks pretty bad.
428; CHECK: blr
429}
430
431define <2 x double> @test50(double* %a) {
432  %v = load double* %a, align 8
433  %w = insertelement <2 x double> undef, double %v, i32 0
434  %x = insertelement <2 x double> %w, double %v, i32 1
435  ret <2 x double> %x
436
437; CHECK-LABEL: @test50
438; CHECK: lxvdsx 34, 0, 3
439; CHECK: blr
440}
441
442define <2 x double> @test51(<2 x double> %a, <2 x double> %b) {
443  %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
444  ret <2 x double> %v
445
446; CHECK-LABEL: @test51
447; CHECK: xxpermdi 34, 34, 34, 0
448; CHECK: blr
449}
450
451define <2 x double> @test52(<2 x double> %a, <2 x double> %b) {
452  %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
453  ret <2 x double> %v
454
455; CHECK-LABEL: @test52
456; CHECK: xxpermdi 34, 34, 35, 0
457; CHECK: blr
458}
459
460define <2 x double> @test53(<2 x double> %a, <2 x double> %b) {
461  %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 0>
462  ret <2 x double> %v
463
464; CHECK-LABEL: @test53
465; CHECK: xxpermdi 34, 35, 34, 0
466; CHECK: blr
467}
468
469define <2 x double> @test54(<2 x double> %a, <2 x double> %b) {
470  %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
471  ret <2 x double> %v
472
473; CHECK-LABEL: @test54
474; CHECK: xxpermdi 34, 34, 35, 2
475; CHECK: blr
476}
477
478define <2 x double> @test55(<2 x double> %a, <2 x double> %b) {
479  %v = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
480  ret <2 x double> %v
481
482; CHECK-LABEL: @test55
483; CHECK: xxpermdi 34, 34, 35, 3
484; CHECK: blr
485}
486
487define <2 x i64> @test56(<2 x i64> %a, <2 x i64> %b) {
488  %v = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
489  ret <2 x i64> %v
490
491; CHECK-LABEL: @test56
492; CHECK: xxpermdi 34, 34, 35, 3
493; CHECK: blr
494}
495
496define <2 x i64> @test60(<2 x i64> %a, <2 x i64> %b) {
497  %v = shl <2 x i64> %a, %b
498  ret <2 x i64> %v
499
500; CHECK-LABEL: @test60
501; This should scalarize, and the current code quality is not good.
502; CHECK: stxvd2x
503; CHECK: stxvd2x
504; CHECK: sld
505; CHECK: sld
506; CHECK: lxvd2x
507; CHECK: blr
508}
509
510define <2 x i64> @test61(<2 x i64> %a, <2 x i64> %b) {
511  %v = lshr <2 x i64> %a, %b
512  ret <2 x i64> %v
513
514; CHECK-LABEL: @test61
515; This should scalarize, and the current code quality is not good.
516; CHECK: stxvd2x
517; CHECK: stxvd2x
518; CHECK: srd
519; CHECK: srd
520; CHECK: lxvd2x
521; CHECK: blr
522}
523
524define <2 x i64> @test62(<2 x i64> %a, <2 x i64> %b) {
525  %v = ashr <2 x i64> %a, %b
526  ret <2 x i64> %v
527
528; CHECK-LABEL: @test62
529; This should scalarize, and the current code quality is not good.
530; CHECK: stxvd2x
531; CHECK: stxvd2x
532; CHECK: srad
533; CHECK: srad
534; CHECK: lxvd2x
535; CHECK: blr
536}
537
538define double @test63(<2 x double> %a) {
539  %v = extractelement <2 x double> %a, i32 0
540  ret double %v
541
542; CHECK-LABEL: @test63
543; CHECK: xxlor 1, 34, 34
544; CHECK: blr
545}
546
547define double @test64(<2 x double> %a) {
548  %v = extractelement <2 x double> %a, i32 1
549  ret double %v
550
551; CHECK-LABEL: @test64
552; CHECK: xxpermdi 1, 34, 34, 2
553; CHECK: blr
554}
555
556define <2 x i1> @test65(<2 x i64> %a, <2 x i64> %b) {
557  %w = icmp eq <2 x i64> %a, %b
558  ret <2 x i1> %w
559
560; CHECK-LABEL: @test65
561; CHECK: vcmpequw 2, 2, 3
562; CHECK: blr
563}
564
565define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
566  %w = icmp ne <2 x i64> %a, %b
567  ret <2 x i1> %w
568
569; CHECK-LABEL: @test66
570; CHECK: vcmpequw {{[0-9]+}}, 2, 3
571; CHECK: xxlnor 34, {{[0-9]+}}, {{[0-9]+}}
572; CHECK: blr
573}
574
575define <2 x i1> @test67(<2 x i64> %a, <2 x i64> %b) {
576  %w = icmp ult <2 x i64> %a, %b
577  ret <2 x i1> %w
578
579; CHECK-LABEL: @test67
580; This should scalarize, and the current code quality is not good.
581; CHECK: stxvd2x
582; CHECK: stxvd2x
583; CHECK: cmpld
584; CHECK: cmpld
585; CHECK: lxvd2x
586; CHECK: blr
587}
588
589define <2 x double> @test68(<2 x i32> %a) {
590  %w = sitofp <2 x i32> %a to <2 x double>
591  ret <2 x double> %w
592
593; CHECK-LABEL: @test68
594; CHECK: xxsldwi [[V1:[0-9]+]], 34, 34, 1
595; CHECK: xvcvsxwdp 34, [[V1]]
596; CHECK: blr
597}
598
599define <2 x double> @test69(<2 x i16> %a) {
600  %w = sitofp <2 x i16> %a to <2 x double>
601  ret <2 x double> %w
602
603; CHECK-LABEL: @test69
604; CHECK: vspltisw [[V1:[0-9]+]], 8
605; CHECK: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
606; CHECK: vslw [[V3:[0-9]+]], 2, [[V2]]
607; CHECK: vsraw {{[0-9]+}}, [[V3]], [[V2]]
608; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
609; CHECK: xvcvsxwdp 34, [[V4]]
610; CHECK: blr
611}
612
613define <2 x double> @test70(<2 x i8> %a) {
614  %w = sitofp <2 x i8> %a to <2 x double>
615  ret <2 x double> %w
616
617; CHECK-LABEL: @test70
618; CHECK: vspltisw [[V1:[0-9]+]], 12
619; CHECK: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
620; CHECK: vslw [[V3:[0-9]+]], 2, [[V2]]
621; CHECK: vsraw {{[0-9]+}}, [[V3]], [[V2]]
622; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
623; CHECK: xvcvsxwdp 34, [[V4]]
624; CHECK: blr
625}
626
627define <2 x i32> @test80(i32 %v) {
628  %b1 = insertelement <2 x i32> undef, i32 %v, i32 0
629  %b2 = shufflevector <2 x i32> %b1, <2 x i32> undef, <2 x i32> zeroinitializer
630  %i = add <2 x i32> %b2, <i32 2, i32 3>
631  ret <2 x i32> %i
632
633; CHECK-LABEL: @test80
634; CHECK-DAG: addi [[R1:[0-9]+]], 3, 3
635; CHECK-DAG: addi [[R2:[0-9]+]], 1, -16
636; CHECK-DAG: addi [[R3:[0-9]+]], 3, 2
637; CHECK: std [[R1]], 8([[R2]])
638; CHECK: std [[R3]], -16(1)
639; CHECK: lxvd2x 34, 0, [[R2]]
640; CHECK-NOT: stxvd2x
641; CHECK: blr
642}
643
644define <2 x double> @test81(<4 x float> %b) {
645  %w = bitcast <4 x float> %b to <2 x double>
646  ret <2 x double> %w
647
648; CHECK-LABEL: @test81
649; CHECK: blr
650}
651
652