1; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=pentium4 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2 %s
2; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=yonah | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE3 %s
3; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3 %s
4; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=penryn | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE41 %s
5; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42 %s
6; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 %s
7; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 %s
8; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 %s
9
10target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
11target triple = "x86_64-apple-macosx10.8.0"
12
13define i32 @cmp(i32 %arg) {
14  ;  -- floats --
15  ;SSE2:  cost of 3 {{.*}} fcmp
16  ;SSE3:  cost of 3 {{.*}} fcmp
17  ;SSSE3: cost of 3 {{.*}} fcmp
18  ;SSE41: cost of 3 {{.*}} fcmp
19  ;SSE42: cost of 1 {{.*}} fcmp
20  ;AVX:   cost of 1 {{.*}} fcmp
21  %A = fcmp olt <2 x float> undef, undef
22
23  ;SSE2:  cost of 7 {{.*}} fcmp
24  ;SSE3:  cost of 7 {{.*}} fcmp
25  ;SSSE3: cost of 7 {{.*}} fcmp
26  ;SSE41: cost of 7 {{.*}} fcmp
27  ;SSE42: cost of 1 {{.*}} fcmp
28  ;AVX:   cost of 1 {{.*}} fcmp
29  %B = fcmp olt <4 x float> undef, undef
30
31  ;SSE2:  cost of 14 {{.*}} fcmp
32  ;SSE3:  cost of 14 {{.*}} fcmp
33  ;SSSE3: cost of 14 {{.*}} fcmp
34  ;SSE41: cost of 14 {{.*}} fcmp
35  ;SSE42: cost of 2 {{.*}} fcmp
36  ;AVX:   cost of 1 {{.*}} fcmp
37  %C = fcmp olt <8 x float> undef, undef
38
39  ;SSE2:  cost of 3 {{.*}} fcmp
40  ;SSE3:  cost of 3 {{.*}} fcmp
41  ;SSSE3: cost of 3 {{.*}} fcmp
42  ;SSE41: cost of 3 {{.*}} fcmp
43  ;SSE42: cost of 1 {{.*}} fcmp
44  ;AVX:   cost of 1 {{.*}} fcmp
45  %D = fcmp olt <2 x double> undef, undef
46
47  ;SSE2:  cost of 6 {{.*}} fcmp
48  ;SSE3:  cost of 6 {{.*}} fcmp
49  ;SSSE3: cost of 6 {{.*}} fcmp
50  ;SSE41: cost of 6 {{.*}} fcmp
51  ;SSE42: cost of 2 {{.*}} fcmp
52  ;AVX:   cost of 1 {{.*}} fcmp
53  %E = fcmp olt <4 x double> undef, undef
54
55  ; AVX512: cost of 1 {{.*}} %E1 = fcmp
56  %E1 = fcmp olt <16 x float> undef, undef
57
58  ; AVX512: cost of 1 {{.*}} %E2 = fcmp
59  %E2 = fcmp olt <8 x double> undef, undef
60
61  ; AVX512: cost of 2 {{.*}} %E3 = fcmp
62  %E3 = fcmp olt <16 x double> undef, undef
63
64  ;  -- integers --
65
66  ;SSE2:  cost of 1 {{.*}} icmp
67  ;SSE3:  cost of 1 {{.*}} icmp
68  ;SSSE3: cost of 1 {{.*}} icmp
69  ;SSE41: cost of 1 {{.*}} icmp
70  ;SSE42: cost of 1 {{.*}} icmp
71  ;AVX:   cost of 1 {{.*}} icmp
72  %F = icmp eq <16 x i8> undef, undef
73
74  ;SSE2:  cost of 1 {{.*}} icmp
75  ;SSE3:  cost of 1 {{.*}} icmp
76  ;SSSE3: cost of 1 {{.*}} icmp
77  ;SSE41: cost of 1 {{.*}} icmp
78  ;SSE42: cost of 1 {{.*}} icmp
79  ;AVX:   cost of 1 {{.*}} icmp
80  %G = icmp eq <8 x i16> undef, undef
81
82  ;SSE2:  cost of 1 {{.*}} icmp
83  ;SSE3:  cost of 1 {{.*}} icmp
84  ;SSSE3: cost of 1 {{.*}} icmp
85  ;SSE41: cost of 1 {{.*}} icmp
86  ;SSE42: cost of 1 {{.*}} icmp
87  ;AVX:   cost of 1 {{.*}} icmp
88  %H = icmp eq <4 x i32> undef, undef
89
90  ;SSE2:  cost of 8 {{.*}} icmp
91  ;SSE3:  cost of 8 {{.*}} icmp
92  ;SSSE3: cost of 8 {{.*}} icmp
93  ;SSE41: cost of 8 {{.*}} icmp
94  ;SSE42: cost of 1 {{.*}} icmp
95  ;AVX:   cost of 1 {{.*}} icmp
96  %I = icmp eq <2 x i64> undef, undef
97
98  ;SSE2:  cost of 16 {{.*}} icmp
99  ;SSE3:  cost of 16 {{.*}} icmp
100  ;SSSE3: cost of 16 {{.*}} icmp
101  ;SSE41: cost of 16 {{.*}} icmp
102  ;SSE42: cost of 2 {{.*}} icmp
103  ;AVX1:  cost of 4 {{.*}} icmp
104  ;AVX2:  cost of 1 {{.*}} icmp
105  %J = icmp eq <4 x i64> undef, undef
106
107  ;SSE2:  cost of 2 {{.*}} icmp
108  ;SSE3:  cost of 2 {{.*}} icmp
109  ;SSSE3: cost of 2 {{.*}} icmp
110  ;SSE41: cost of 2 {{.*}} icmp
111  ;SSE42: cost of 2 {{.*}} icmp
112  ;AVX1:  cost of 4 {{.*}} icmp
113  ;AVX2:  cost of 1 {{.*}} icmp
114  %K = icmp eq <8 x i32> undef, undef
115
116  ;SSE2:  cost of 2 {{.*}} icmp
117  ;SSE3:  cost of 2 {{.*}} icmp
118  ;SSSE3: cost of 2 {{.*}} icmp
119  ;SSE41: cost of 2 {{.*}} icmp
120  ;SSE42: cost of 2 {{.*}} icmp
121  ;AVX1:  cost of 4 {{.*}} icmp
122  ;AVX2:  cost of 1 {{.*}} icmp
123  %L = icmp eq <16 x i16> undef, undef
124
125  ;SSE2:  cost of 2 {{.*}} icmp
126  ;SSE3:  cost of 2 {{.*}} icmp
127  ;SSSE3: cost of 2 {{.*}} icmp
128  ;SSE41: cost of 2 {{.*}} icmp
129  ;SSE42: cost of 2 {{.*}} icmp
130  ;AVX1:  cost of 4 {{.*}} icmp
131  ;AVX2:  cost of 1 {{.*}} icmp
132  %M = icmp eq <32 x i8> undef, undef
133
134  ; AVX512: cost of 1 {{.*}} %M1 = icmp
135  %M1 = icmp eq <16 x i32> undef, undef
136
137  ; AVX512: cost of 1 {{.*}} %M2 = icmp
138  %M2 = icmp eq <8 x i64> undef, undef
139
140  ; AVX512: cost of 2 {{.*}} %M3 = icmp
141  %M3 = icmp eq <16 x i64> undef, undef
142
143  ;CHECK: cost of 0 {{.*}} ret
144  ret i32 undef
145}
146
147
148