1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
6
7; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
8; SI: v_bfe_u32
9; EG: BFE_UINT
10define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
11  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
12  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
13  ret void
14}
15
16; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
17; SI: v_bfe_u32
18; EG: BFE_UINT
19define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
20  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
21  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
22  ret void
23}
24
25; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
26; SI: v_bfe_u32
27; EG: BFE_UINT
28define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
29  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
30  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
31  ret void
32}
33
34; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
35; SI: v_bfe_u32
36; EG: BFE_UINT
37define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
38  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
39  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
40  ret void
41}
42
43; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
44; SI-NOT: {{[^@]}}bfe
45; SI: s_endpgm
46; EG-NOT: BFE
47define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
48  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
49  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
50  ret void
51}
52
53; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
54; SI-NOT: {{[^@]}}bfe
55; SI: s_endpgm
56; EG-NOT: BFE
57define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
58  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
59  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
60  ret void
61}
62
63; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
64; SI: buffer_load_ubyte
65; SI-NOT: {{[^@]}}bfe
66; SI: s_endpgm
67define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
68  %load = load i8, i8 addrspace(1)* %in
69  %ext = zext i8 %load to i32
70  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
71  store i32 %bfe, i32 addrspace(1)* %out, align 4
72  ret void
73}
74
75; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
76; SI: buffer_load_dword
77; SI: v_add_i32
78; SI-NEXT: v_and_b32_e32
79; SI-NOT: {{[^@]}}bfe
80; SI: s_endpgm
81define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
82  %load = load i32, i32 addrspace(1)* %in, align 4
83  %add = add i32 %load, 1
84  %ext = and i32 %add, 255
85  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
86  store i32 %bfe, i32 addrspace(1)* %out, align 4
87  ret void
88}
89
90; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
91; SI: buffer_load_dword
92; SI: v_add_i32
93; SI-NEXT: v_and_b32_e32
94; SI-NOT: {{[^@]}}bfe
95; SI: s_endpgm
96define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
97  %load = load i32, i32 addrspace(1)* %in, align 4
98  %add = add i32 %load, 1
99  %ext = and i32 %add, 65535
100  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
101  store i32 %bfe, i32 addrspace(1)* %out, align 4
102  ret void
103}
104
105; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
106; SI: buffer_load_dword
107; SI: v_add_i32
108; SI: bfe
109; SI: s_endpgm
110define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
111  %load = load i32, i32 addrspace(1)* %in, align 4
112  %add = add i32 %load, 1
113  %ext = and i32 %add, 255
114  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
115  store i32 %bfe, i32 addrspace(1)* %out, align 4
116  ret void
117}
118
119; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
120; SI: buffer_load_dword
121; SI: v_add_i32
122; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
123; SI-NEXT: bfe
124; SI: s_endpgm
125define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
126  %load = load i32, i32 addrspace(1)* %in, align 4
127  %add = add i32 %load, 1
128  %ext = and i32 %add, 255
129  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
130  store i32 %bfe, i32 addrspace(1)* %out, align 4
131  ret void
132}
133
134; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
135; SI: buffer_load_dword
136; SI: v_add_i32
137; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
138; SI-NEXT: bfe
139; SI: s_endpgm
140define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
141  %load = load i32, i32 addrspace(1)* %in, align 4
142  %add = add i32 %load, 1
143  %ext = and i32 %add, 255
144  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
145  store i32 %bfe, i32 addrspace(1)* %out, align 4
146  ret void
147}
148
149; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
150; SI: buffer_load_dword
151; SI: v_add_i32
152; SI-NEXT: bfe
153; SI: s_endpgm
154define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
155  %load = load i32, i32 addrspace(1)* %in, align 4
156  %add = add i32 %load, 1
157  %ext = and i32 %add, 65535
158  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
159  store i32 %bfe, i32 addrspace(1)* %out, align 4
160  ret void
161}
162
163; FUNC-LABEL: {{^}}bfe_u32_test_1:
164; SI: buffer_load_dword
165; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
166; SI: s_endpgm
167; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
168define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
169  %x = load i32, i32 addrspace(1)* %in, align 4
170  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
171  store i32 %bfe, i32 addrspace(1)* %out, align 4
172  ret void
173}
174
175define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
176  %x = load i32, i32 addrspace(1)* %in, align 4
177  %shl = shl i32 %x, 31
178  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
179  store i32 %bfe, i32 addrspace(1)* %out, align 4
180  ret void
181}
182
183define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
184  %x = load i32, i32 addrspace(1)* %in, align 4
185  %shl = shl i32 %x, 31
186  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
187  store i32 %bfe, i32 addrspace(1)* %out, align 4
188  ret void
189}
190
191; FUNC-LABEL: {{^}}bfe_u32_test_4:
192; SI-NOT: lshl
193; SI-NOT: shr
194; SI-NOT: {{[^@]}}bfe
195; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
196; SI: buffer_store_dword [[VREG]],
197; SI: s_endpgm
198define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
199  %x = load i32, i32 addrspace(1)* %in, align 4
200  %shl = shl i32 %x, 31
201  %shr = lshr i32 %shl, 31
202  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
203  store i32 %bfe, i32 addrspace(1)* %out, align 4
204  ret void
205}
206
207; FUNC-LABEL: {{^}}bfe_u32_test_5:
208; SI: buffer_load_dword
209; SI-NOT: lshl
210; SI-NOT: shr
211; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
212; SI: s_endpgm
213define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
214  %x = load i32, i32 addrspace(1)* %in, align 4
215  %shl = shl i32 %x, 31
216  %shr = ashr i32 %shl, 31
217  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
218  store i32 %bfe, i32 addrspace(1)* %out, align 4
219  ret void
220}
221
222; FUNC-LABEL: {{^}}bfe_u32_test_6:
223; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
224; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
225; SI: s_endpgm
226define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
227  %x = load i32, i32 addrspace(1)* %in, align 4
228  %shl = shl i32 %x, 31
229  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
230  store i32 %bfe, i32 addrspace(1)* %out, align 4
231  ret void
232}
233
234; FUNC-LABEL: {{^}}bfe_u32_test_7:
235; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
236; SI-NOT: {{[^@]}}bfe
237; SI: s_endpgm
238define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
239  %x = load i32, i32 addrspace(1)* %in, align 4
240  %shl = shl i32 %x, 31
241  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
242  store i32 %bfe, i32 addrspace(1)* %out, align 4
243  ret void
244}
245
246; FUNC-LABEL: {{^}}bfe_u32_test_8:
247; SI-NOT: {{[^@]}}bfe
248; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
249; SI-NOT: {{[^@]}}bfe
250; SI: s_endpgm
251define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
252  %x = load i32, i32 addrspace(1)* %in, align 4
253  %shl = shl i32 %x, 31
254  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
255  store i32 %bfe, i32 addrspace(1)* %out, align 4
256  ret void
257}
258
259; FUNC-LABEL: {{^}}bfe_u32_test_9:
260; SI-NOT: {{[^@]}}bfe
261; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
262; SI-NOT: {{[^@]}}bfe
263; SI: s_endpgm
264define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
265  %x = load i32, i32 addrspace(1)* %in, align 4
266  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
267  store i32 %bfe, i32 addrspace(1)* %out, align 4
268  ret void
269}
270
271; FUNC-LABEL: {{^}}bfe_u32_test_10:
272; SI-NOT: {{[^@]}}bfe
273; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
274; SI-NOT: {{[^@]}}bfe
275; SI: s_endpgm
276define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
277  %x = load i32, i32 addrspace(1)* %in, align 4
278  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
279  store i32 %bfe, i32 addrspace(1)* %out, align 4
280  ret void
281}
282
283; FUNC-LABEL: {{^}}bfe_u32_test_11:
284; SI-NOT: {{[^@]}}bfe
285; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
286; SI-NOT: {{[^@]}}bfe
287; SI: s_endpgm
288define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
289  %x = load i32, i32 addrspace(1)* %in, align 4
290  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
291  store i32 %bfe, i32 addrspace(1)* %out, align 4
292  ret void
293}
294
295; FUNC-LABEL: {{^}}bfe_u32_test_12:
296; SI-NOT: {{[^@]}}bfe
297; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
298; SI-NOT: {{[^@]}}bfe
299; SI: s_endpgm
300define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
301  %x = load i32, i32 addrspace(1)* %in, align 4
302  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
303  store i32 %bfe, i32 addrspace(1)* %out, align 4
304  ret void
305}
306
307; FUNC-LABEL: {{^}}bfe_u32_test_13:
308; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
309; SI-NOT: {{[^@]}}bfe
310; SI: s_endpgm
311define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
312  %x = load i32, i32 addrspace(1)* %in, align 4
313  %shl = ashr i32 %x, 31
314  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
315  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
316}
317
318; FUNC-LABEL: {{^}}bfe_u32_test_14:
319; SI-NOT: lshr
320; SI-NOT: {{[^@]}}bfe
321; SI: s_endpgm
322define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
323  %x = load i32, i32 addrspace(1)* %in, align 4
324  %shl = lshr i32 %x, 31
325  %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
326  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
327}
328
329; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
330; SI-NOT: {{[^@]}}bfe
331; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
332; SI: buffer_store_dword [[VREG]],
333; SI: s_endpgm
334; EG-NOT: BFE
335define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
336  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
337  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
338  ret void
339}
340
341; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
342; SI-NOT: {{[^@]}}bfe
343; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
344; SI: buffer_store_dword [[VREG]],
345; SI: s_endpgm
346; EG-NOT: BFE
347define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
348  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
349  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
350  ret void
351}
352
353; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
354; SI-NOT: {{[^@]}}bfe
355; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
356; SI: buffer_store_dword [[VREG]],
357; SI: s_endpgm
358; EG-NOT: BFE
359define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
360  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
361  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
362  ret void
363}
364
365; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
366; SI-NOT: {{[^@]}}bfe
367; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
368; SI: buffer_store_dword [[VREG]],
369; SI: s_endpgm
370; EG-NOT: BFE
371define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
372  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
373  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
374  ret void
375}
376
377; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
378; SI-NOT: {{[^@]}}bfe
379; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
380; SI: buffer_store_dword [[VREG]],
381; SI: s_endpgm
382; EG-NOT: BFE
383define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
384  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
385  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
386  ret void
387}
388
389; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
390; SI-NOT: {{[^@]}}bfe
391; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
392; SI: buffer_store_dword [[VREG]],
393; SI: s_endpgm
394; EG-NOT: BFE
395define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
396  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
397  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
398  ret void
399}
400
401; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
402; SI-NOT: {{[^@]}}bfe
403; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
404; SI: buffer_store_dword [[VREG]],
405; SI: s_endpgm
406; EG-NOT: BFE
407define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
408  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
409  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
410  ret void
411}
412
413; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
414; SI-NOT: {{[^@]}}bfe
415; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
416; SI: buffer_store_dword [[VREG]],
417; SI: s_endpgm
418; EG-NOT: BFE
419define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
420  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
421  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
422  ret void
423}
424
425; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
426; SI-NOT: {{[^@]}}bfe
427; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
428; SI: buffer_store_dword [[VREG]],
429; SI: s_endpgm
430; EG-NOT: BFE
431define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
432  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
433  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
434  ret void
435}
436
437; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
438; SI-NOT: {{[^@]}}bfe
439; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
440; SI: buffer_store_dword [[VREG]],
441; SI: s_endpgm
442; EG-NOT: BFE
443define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
444  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
445  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
446  ret void
447}
448
449; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
450; SI-NOT: {{[^@]}}bfe
451; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
452; SI: buffer_store_dword [[VREG]],
453; SI: s_endpgm
454; EG-NOT: BFE
455define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
456  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
457  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
458  ret void
459}
460
461; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
462; SI-NOT: {{[^@]}}bfe
463; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
464; SI: buffer_store_dword [[VREG]],
465; SI: s_endpgm
466; EG-NOT: BFE
467define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
468  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
469  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
470  ret void
471}
472
473; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
474; SI-NOT: {{[^@]}}bfe
475; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
476; SI: buffer_store_dword [[VREG]],
477; SI: s_endpgm
478; EG-NOT: BFE
479define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
480  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
481  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
482  ret void
483}
484
485; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
486; SI-NOT: {{[^@]}}bfe
487; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
488; SI: buffer_store_dword [[VREG]],
489; SI: s_endpgm
490; EG-NOT: BFE
491define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
492  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
493  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
494  ret void
495}
496
497; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
498; SI-NOT: {{[^@]}}bfe
499; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
500; SI: buffer_store_dword [[VREG]],
501; SI: s_endpgm
502; EG-NOT: BFE
503define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
504  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
505  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
506  ret void
507}
508
509; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
510; SI-NOT: {{[^@]}}bfe
511; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
512; SI: buffer_store_dword [[VREG]],
513; SI: s_endpgm
514; EG-NOT: BFE
515define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
516  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
517  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
518  ret void
519}
520
521; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
522; SI-NOT: {{[^@]}}bfe
523; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
524; SI: buffer_store_dword [[VREG]],
525; SI: s_endpgm
526; EG-NOT: BFE
527define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
528  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
529  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
530  ret void
531}
532
533; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
534; SI-NOT: {{[^@]}}bfe
535; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
536; SI: buffer_store_dword [[VREG]],
537; SI: s_endpgm
538; EG-NOT: BFE
539define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
540  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
541  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
542  ret void
543}
544
545; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
546; SI-NOT: {{[^@]}}bfe
547; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
548; SI: buffer_store_dword [[VREG]],
549; SI: s_endpgm
550; EG-NOT: BFE
551define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
552  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
553  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
554  ret void
555}
556
557; Make sure that SimplifyDemandedBits doesn't cause the and to be
558; reduced to the bits demanded by the bfe.
559
560; XXX: The operand to v_bfe_u32 could also just directly be the load register.
561; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
562; SI: buffer_load_dword [[ARG:v[0-9]+]]
563; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
564; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
565; SI-DAG: buffer_store_dword [[AND]]
566; SI-DAG: buffer_store_dword [[BFE]]
567; SI: s_endpgm
568define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
569                                            i32 addrspace(1)* %out1,
570                                            i32 addrspace(1)* %in) nounwind {
571  %src = load i32, i32 addrspace(1)* %in, align 4
572  %and = and i32 %src, 63
573  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
574  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
575  store i32 %and, i32 addrspace(1)* %out1, align 4
576  ret void
577}
578
579; FUNC-LABEL: {{^}}lshr_and:
580; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
581; SI: buffer_store_dword
582define void @lshr_and(i32 addrspace(1)* %out, i32 %a) nounwind {
583  %b = lshr i32 %a, 6
584  %c = and i32 %b, 7
585  store i32 %c, i32 addrspace(1)* %out, align 8
586  ret void
587}
588
589; FUNC-LABEL: {{^}}v_lshr_and:
590; SI: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
591; SI: buffer_store_dword
592define void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
593  %c = lshr i32 %a, %b
594  %d = and i32 %c, 7
595  store i32 %d, i32 addrspace(1)* %out, align 8
596  ret void
597}
598
599; FUNC-LABEL: {{^}}and_lshr:
600; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
601; SI: buffer_store_dword
602define void @and_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
603  %b = and i32 %a, 448
604  %c = lshr i32 %b, 6
605  store i32 %c, i32 addrspace(1)* %out, align 8
606  ret void
607}
608
609; FUNC-LABEL: {{^}}and_lshr2:
610; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
611; SI: buffer_store_dword
612define void @and_lshr2(i32 addrspace(1)* %out, i32 %a) nounwind {
613  %b = and i32 %a, 511
614  %c = lshr i32 %b, 6
615  store i32 %c, i32 addrspace(1)* %out, align 8
616  ret void
617}
618
619; FUNC-LABEL: {{^}}shl_lshr:
620; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
621; SI: buffer_store_dword
622define void @shl_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
623  %b = shl i32 %a, 9
624  %c = lshr i32 %b, 11
625  store i32 %c, i32 addrspace(1)* %out, align 8
626  ret void
627}
628