1; Test the saving and restoring of FPRs in large frames.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
4; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
5
6; Test a frame size that requires some FPRs to be saved and loaded using
7; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.
8; The frame is big enough to require two emergency spill slots at 160(%r15),
9; as well as the 8 FPR save slots.  Get a frame of size 4128 by allocating
10; (4128 - 176 - 8 * 8) / 8 = 486 extra doublewords.
11define void @f1(double *%ptr, i64 %x) {
12; CHECK-NOFP-LABEL: f1:
13; CHECK-NOFP: aghi %r15, -4128
14; CHECK-NOFP: .cfi_def_cfa_offset 4288
15; CHECK-NOFP: stdy %f8, 4120(%r15)
16; CHECK-NOFP: stdy %f9, 4112(%r15)
17; CHECK-NOFP: stdy %f10, 4104(%r15)
18; CHECK-NOFP: stdy %f11, 4096(%r15)
19; CHECK-NOFP: std %f12, 4088(%r15)
20; CHECK-NOFP: std %f13, 4080(%r15)
21; CHECK-NOFP: std %f14, 4072(%r15)
22; CHECK-NOFP: std %f15, 4064(%r15)
23; CHECK-NOFP: .cfi_offset %f8, -168
24; CHECK-NOFP: .cfi_offset %f9, -176
25; CHECK-NOFP: .cfi_offset %f10, -184
26; CHECK-NOFP: .cfi_offset %f11, -192
27; CHECK-NOFP: .cfi_offset %f12, -200
28; CHECK-NOFP: .cfi_offset %f13, -208
29; CHECK-NOFP: .cfi_offset %f14, -216
30; CHECK-NOFP: .cfi_offset %f15, -224
31; ...main function body...
32; CHECK-NOFP: ldy %f8, 4120(%r15)
33; CHECK-NOFP: ldy %f9, 4112(%r15)
34; CHECK-NOFP: ldy %f10, 4104(%r15)
35; CHECK-NOFP: ldy %f11, 4096(%r15)
36; CHECK-NOFP: ld %f12, 4088(%r15)
37; CHECK-NOFP: ld %f13, 4080(%r15)
38; CHECK-NOFP: ld %f14, 4072(%r15)
39; CHECK-NOFP: ld %f15, 4064(%r15)
40; CHECK-NOFP: aghi %r15, 4128
41; CHECK-NOFP: br %r14
42;
43; CHECK-FP-LABEL: f1:
44; CHECK-FP: stmg %r11, %r15, 88(%r15)
45; CHECK-FP: aghi %r15, -4128
46; CHECK-FP: .cfi_def_cfa_offset 4288
47; CHECK-FP: lgr %r11, %r15
48; CHECK-FP: .cfi_def_cfa_register %r11
49; CHECK-FP: stdy %f8, 4120(%r11)
50; CHECK-FP: stdy %f9, 4112(%r11)
51; CHECK-FP: stdy %f10, 4104(%r11)
52; CHECK-FP: stdy %f11, 4096(%r11)
53; CHECK-FP: std %f12, 4088(%r11)
54; CHECK-FP: std %f13, 4080(%r11)
55; CHECK-FP: std %f14, 4072(%r11)
56; CHECK-FP: std %f15, 4064(%r11)
57; ...main function body...
58; CHECK-FP: ldy %f8, 4120(%r11)
59; CHECK-FP: ldy %f9, 4112(%r11)
60; CHECK-FP: ldy %f10, 4104(%r11)
61; CHECK-FP: ldy %f11, 4096(%r11)
62; CHECK-FP: ld %f12, 4088(%r11)
63; CHECK-FP: ld %f13, 4080(%r11)
64; CHECK-FP: ld %f14, 4072(%r11)
65; CHECK-FP: ld %f15, 4064(%r11)
66; CHECK-FP: lmg %r11, %r15, 4216(%r11)
67; CHECK-FP: br %r14
68  %y = alloca [486 x i64], align 8
69  %elem = getelementptr inbounds [486 x i64]* %y, i64 0, i64 0
70  store volatile i64 %x, i64* %elem
71  %l0 = load volatile double *%ptr
72  %l1 = load volatile double *%ptr
73  %l2 = load volatile double *%ptr
74  %l3 = load volatile double *%ptr
75  %l4 = load volatile double *%ptr
76  %l5 = load volatile double *%ptr
77  %l6 = load volatile double *%ptr
78  %l7 = load volatile double *%ptr
79  %l8 = load volatile double *%ptr
80  %l9 = load volatile double *%ptr
81  %l10 = load volatile double *%ptr
82  %l11 = load volatile double *%ptr
83  %l12 = load volatile double *%ptr
84  %l13 = load volatile double *%ptr
85  %l14 = load volatile double *%ptr
86  %l15 = load volatile double *%ptr
87  %add0 = fadd double %l0, %l0
88  %add1 = fadd double %l1, %add0
89  %add2 = fadd double %l2, %add1
90  %add3 = fadd double %l3, %add2
91  %add4 = fadd double %l4, %add3
92  %add5 = fadd double %l5, %add4
93  %add6 = fadd double %l6, %add5
94  %add7 = fadd double %l7, %add6
95  %add8 = fadd double %l8, %add7
96  %add9 = fadd double %l9, %add8
97  %add10 = fadd double %l10, %add9
98  %add11 = fadd double %l11, %add10
99  %add12 = fadd double %l12, %add11
100  %add13 = fadd double %l13, %add12
101  %add14 = fadd double %l14, %add13
102  %add15 = fadd double %l15, %add14
103  store volatile double %add0, double *%ptr
104  store volatile double %add1, double *%ptr
105  store volatile double %add2, double *%ptr
106  store volatile double %add3, double *%ptr
107  store volatile double %add4, double *%ptr
108  store volatile double %add5, double *%ptr
109  store volatile double %add6, double *%ptr
110  store volatile double %add7, double *%ptr
111  store volatile double %add8, double *%ptr
112  store volatile double %add9, double *%ptr
113  store volatile double %add10, double *%ptr
114  store volatile double %add11, double *%ptr
115  store volatile double %add12, double *%ptr
116  store volatile double %add13, double *%ptr
117  store volatile double %add14, double *%ptr
118  store volatile double %add15, double *%ptr
119  ret void
120}
121
122; Test a frame size that requires some FPRs to be saved and loaded using
123; an indexed STD and LD while others can use the 20-bit STDY and LDY.
124; The index can be any call-clobbered GPR except %r0.
125;
126; Don't require the accesses to share the same LLILH; that would be a
127; good optimisation but is really a different test.
128;
129; As above, get a frame of size 524320 by allocating
130; (524320 - 176 - 8 * 8) / 8 = 65510 extra doublewords.
131define void @f2(double *%ptr, i64 %x) {
132; CHECK-NOFP-LABEL: f2:
133; CHECK-NOFP: agfi %r15, -524320
134; CHECK-NOFP: .cfi_def_cfa_offset 524480
135; CHECK-NOFP: llilh [[INDEX:%r[1-5]]], 8
136; CHECK-NOFP: std %f8, 24([[INDEX]],%r15)
137; CHECK-NOFP: std %f9, 16({{%r[1-5]}},%r15)
138; CHECK-NOFP: std %f10, 8({{%r[1-5]}},%r15)
139; CHECK-NOFP: std %f11, 0({{%r[1-5]}},%r15)
140; CHECK-NOFP: stdy %f12, 524280(%r15)
141; CHECK-NOFP: stdy %f13, 524272(%r15)
142; CHECK-NOFP: stdy %f14, 524264(%r15)
143; CHECK-NOFP: stdy %f15, 524256(%r15)
144; CHECK-NOFP: .cfi_offset %f8, -168
145; CHECK-NOFP: .cfi_offset %f9, -176
146; CHECK-NOFP: .cfi_offset %f10, -184
147; CHECK-NOFP: .cfi_offset %f11, -192
148; CHECK-NOFP: .cfi_offset %f12, -200
149; CHECK-NOFP: .cfi_offset %f13, -208
150; CHECK-NOFP: .cfi_offset %f14, -216
151; CHECK-NOFP: .cfi_offset %f15, -224
152; ...main function body...
153; CHECK-NOFP: ld %f8, 24({{%r[1-5]}},%r15)
154; CHECK-NOFP: ld %f9, 16({{%r[1-5]}},%r15)
155; CHECK-NOFP: ld %f10, 8({{%r[1-5]}},%r15)
156; CHECK-NOFP: ld %f11, 0({{%r[1-5]}},%r15)
157; CHECK-NOFP: ldy %f12, 524280(%r15)
158; CHECK-NOFP: ldy %f13, 524272(%r15)
159; CHECK-NOFP: ldy %f14, 524264(%r15)
160; CHECK-NOFP: ldy %f15, 524256(%r15)
161; CHECK-NOFP: agfi %r15, 524320
162; CHECK-NOFP: br %r14
163;
164; CHECK-FP-LABEL: f2:
165; CHECK-FP: stmg %r11, %r15, 88(%r15)
166; CHECK-FP: agfi %r15, -524320
167; CHECK-FP: .cfi_def_cfa_offset 524480
168; CHECK-FP: llilh [[INDEX:%r[1-5]]], 8
169; CHECK-FP: std %f8, 24([[INDEX]],%r11)
170; CHECK-FP: std %f9, 16({{%r[1-5]}},%r11)
171; CHECK-FP: std %f10, 8({{%r[1-5]}},%r11)
172; CHECK-FP: std %f11, 0({{%r[1-5]}},%r11)
173; CHECK-FP: stdy %f12, 524280(%r11)
174; CHECK-FP: stdy %f13, 524272(%r11)
175; CHECK-FP: stdy %f14, 524264(%r11)
176; CHECK-FP: stdy %f15, 524256(%r11)
177; CHECK-FP: .cfi_offset %f8, -168
178; CHECK-FP: .cfi_offset %f9, -176
179; CHECK-FP: .cfi_offset %f10, -184
180; CHECK-FP: .cfi_offset %f11, -192
181; CHECK-FP: .cfi_offset %f12, -200
182; CHECK-FP: .cfi_offset %f13, -208
183; CHECK-FP: .cfi_offset %f14, -216
184; CHECK-FP: .cfi_offset %f15, -224
185; ...main function body...
186; CHECK-FP: ld %f8, 24({{%r[1-5]}},%r11)
187; CHECK-FP: ld %f9, 16({{%r[1-5]}},%r11)
188; CHECK-FP: ld %f10, 8({{%r[1-5]}},%r11)
189; CHECK-FP: ld %f11, 0({{%r[1-5]}},%r11)
190; CHECK-FP: ldy %f12, 524280(%r11)
191; CHECK-FP: ldy %f13, 524272(%r11)
192; CHECK-FP: ldy %f14, 524264(%r11)
193; CHECK-FP: ldy %f15, 524256(%r11)
194; CHECK-FP: aghi %r11, 128
195; CHECK-FP: lmg %r11, %r15, 524280(%r11)
196; CHECK-FP: br %r14
197  %y = alloca [65510 x i64], align 8
198  %elem = getelementptr inbounds [65510 x i64]* %y, i64 0, i64 0
199  store volatile i64 %x, i64* %elem
200  %l0 = load volatile double *%ptr
201  %l1 = load volatile double *%ptr
202  %l2 = load volatile double *%ptr
203  %l3 = load volatile double *%ptr
204  %l4 = load volatile double *%ptr
205  %l5 = load volatile double *%ptr
206  %l6 = load volatile double *%ptr
207  %l7 = load volatile double *%ptr
208  %l8 = load volatile double *%ptr
209  %l9 = load volatile double *%ptr
210  %l10 = load volatile double *%ptr
211  %l11 = load volatile double *%ptr
212  %l12 = load volatile double *%ptr
213  %l13 = load volatile double *%ptr
214  %l14 = load volatile double *%ptr
215  %l15 = load volatile double *%ptr
216  %add0 = fadd double %l0, %l0
217  %add1 = fadd double %l1, %add0
218  %add2 = fadd double %l2, %add1
219  %add3 = fadd double %l3, %add2
220  %add4 = fadd double %l4, %add3
221  %add5 = fadd double %l5, %add4
222  %add6 = fadd double %l6, %add5
223  %add7 = fadd double %l7, %add6
224  %add8 = fadd double %l8, %add7
225  %add9 = fadd double %l9, %add8
226  %add10 = fadd double %l10, %add9
227  %add11 = fadd double %l11, %add10
228  %add12 = fadd double %l12, %add11
229  %add13 = fadd double %l13, %add12
230  %add14 = fadd double %l14, %add13
231  %add15 = fadd double %l15, %add14
232  store volatile double %add0, double *%ptr
233  store volatile double %add1, double *%ptr
234  store volatile double %add2, double *%ptr
235  store volatile double %add3, double *%ptr
236  store volatile double %add4, double *%ptr
237  store volatile double %add5, double *%ptr
238  store volatile double %add6, double *%ptr
239  store volatile double %add7, double *%ptr
240  store volatile double %add8, double *%ptr
241  store volatile double %add9, double *%ptr
242  store volatile double %add10, double *%ptr
243  store volatile double %add11, double *%ptr
244  store volatile double %add12, double *%ptr
245  store volatile double %add13, double *%ptr
246  store volatile double %add14, double *%ptr
247  store volatile double %add15, double *%ptr
248  ret void
249}
250