1; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
2target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
3
4define i32 @test1(i32* %p, i1 %C) {
5; CHECK-LABEL: @test1(
6block1:
7	br i1 %C, label %block2, label %block3
8
9block2:
10 br label %block4
11; CHECK: block2:
12; CHECK-NEXT: load i32* %p
13
14block3:
15  store i32 0, i32* %p
16  br label %block4
17
18block4:
19  %PRE = load i32* %p
20  ret i32 %PRE
21; CHECK: block4:
22; CHECK-NEXT: phi i32
23; CHECK-NEXT: ret i32
24}
25
26; This is a simple phi translation case.
27define i32 @test2(i32* %p, i32* %q, i1 %C) {
28; CHECK-LABEL: @test2(
29block1:
30	br i1 %C, label %block2, label %block3
31
32block2:
33 br label %block4
34; CHECK: block2:
35; CHECK-NEXT: load i32* %q
36
37block3:
38  store i32 0, i32* %p
39  br label %block4
40
41block4:
42  %P2 = phi i32* [%p, %block3], [%q, %block2]
43  %PRE = load i32* %P2
44  ret i32 %PRE
45; CHECK: block4:
46; CHECK-NEXT: phi i32 [
47; CHECK-NOT: load
48; CHECK: ret i32
49}
50
51; This is a PRE case that requires phi translation through a GEP.
52define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
53; CHECK-LABEL: @test3(
54block1:
55  %B = getelementptr i32* %q, i32 1
56  store i32* %B, i32** %Hack
57	br i1 %C, label %block2, label %block3
58
59block2:
60 br label %block4
61; CHECK: block2:
62; CHECK-NEXT: load i32* %B
63
64block3:
65  %A = getelementptr i32* %p, i32 1
66  store i32 0, i32* %A
67  br label %block4
68
69block4:
70  %P2 = phi i32* [%p, %block3], [%q, %block2]
71  %P3 = getelementptr i32* %P2, i32 1
72  %PRE = load i32* %P3
73  ret i32 %PRE
74; CHECK: block4:
75; CHECK-NEXT: phi i32 [
76; CHECK-NOT: load
77; CHECK: ret i32
78}
79
80;; Here the loaded address is available, but the computation is in 'block3'
81;; which does not dominate 'block2'.
82define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
83; CHECK-LABEL: @test4(
84block1:
85	br i1 %C, label %block2, label %block3
86
87block2:
88 br label %block4
89; CHECK: block2:
90; CHECK:   load i32*
91; CHECK:   br label %block4
92
93block3:
94  %B = getelementptr i32* %q, i32 1
95  store i32* %B, i32** %Hack
96
97  %A = getelementptr i32* %p, i32 1
98  store i32 0, i32* %A
99  br label %block4
100
101block4:
102  %P2 = phi i32* [%p, %block3], [%q, %block2]
103  %P3 = getelementptr i32* %P2, i32 1
104  %PRE = load i32* %P3
105  ret i32 %PRE
106; CHECK: block4:
107; CHECK-NEXT: phi i32 [
108; CHECK-NOT: load
109; CHECK: ret i32
110}
111
112;void test5(int N, double *G) {
113;  int j;
114;  for (j = 0; j < N - 1; j++)
115;    G[j] = G[j] + G[j+1];
116;}
117
118define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
119; CHECK-LABEL: @test5(
120entry:
121  %0 = add i32 %N, -1           
122  %1 = icmp sgt i32 %0, 0       
123  br i1 %1, label %bb.nph, label %return
124
125bb.nph:                         
126  %tmp = zext i32 %0 to i64     
127  br label %bb
128
129; CHECK: bb.nph:
130; CHECK: load double*
131; CHECK: br label %bb
132
133bb:             
134  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
135  %tmp6 = add i64 %indvar, 1                    
136  %scevgep = getelementptr double* %G, i64 %tmp6
137  %scevgep7 = getelementptr double* %G, i64 %indvar
138  %2 = load double* %scevgep7, align 8
139  %3 = load double* %scevgep, align 8 
140  %4 = fadd double %2, %3             
141  store double %4, double* %scevgep7, align 8
142  %exitcond = icmp eq i64 %tmp6, %tmp 
143  br i1 %exitcond, label %return, label %bb
144
145; Should only be one load in the loop.
146; CHECK: bb:
147; CHECK: load double*
148; CHECK-NOT: load double*
149; CHECK: br i1 %exitcond
150
151return:                               
152  ret void
153}
154
155;void test6(int N, double *G) {
156;  int j;
157;  for (j = 0; j < N - 1; j++)
158;    G[j+1] = G[j] + G[j+1];
159;}
160
161define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
162; CHECK-LABEL: @test6(
163entry:
164  %0 = add i32 %N, -1           
165  %1 = icmp sgt i32 %0, 0       
166  br i1 %1, label %bb.nph, label %return
167
168bb.nph:                         
169  %tmp = zext i32 %0 to i64     
170  br label %bb
171
172; CHECK: bb.nph:
173; CHECK: load double*
174; CHECK: br label %bb
175
176bb:             
177  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
178  %tmp6 = add i64 %indvar, 1                    
179  %scevgep = getelementptr double* %G, i64 %tmp6
180  %scevgep7 = getelementptr double* %G, i64 %indvar
181  %2 = load double* %scevgep7, align 8
182  %3 = load double* %scevgep, align 8 
183  %4 = fadd double %2, %3             
184  store double %4, double* %scevgep, align 8
185  %exitcond = icmp eq i64 %tmp6, %tmp 
186  br i1 %exitcond, label %return, label %bb
187
188; Should only be one load in the loop.
189; CHECK: bb:
190; CHECK: load double*
191; CHECK-NOT: load double*
192; CHECK: br i1 %exitcond
193
194return:                               
195  ret void
196}
197
198;void test7(int N, double* G) {
199;  long j;
200;  G[1] = 1;
201;  for (j = 1; j < N - 1; j++)
202;      G[j+1] = G[j] + G[j+1];
203;}
204
205; This requires phi translation of the adds.
206define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
207entry:
208  %0 = getelementptr inbounds double* %G, i64 1   
209  store double 1.000000e+00, double* %0, align 8
210  %1 = add i32 %N, -1                             
211  %2 = icmp sgt i32 %1, 1                         
212  br i1 %2, label %bb.nph, label %return
213
214bb.nph:                                           
215  %tmp = sext i32 %1 to i64                       
216  %tmp7 = add i64 %tmp, -1                        
217  br label %bb
218
219bb:                                               
220  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 
221  %tmp8 = add i64 %indvar, 2                      
222  %scevgep = getelementptr double* %G, i64 %tmp8  
223  %tmp9 = add i64 %indvar, 1                      
224  %scevgep10 = getelementptr double* %G, i64 %tmp9 
225  %3 = load double* %scevgep10, align 8           
226  %4 = load double* %scevgep, align 8             
227  %5 = fadd double %3, %4                         
228  store double %5, double* %scevgep, align 8
229  %exitcond = icmp eq i64 %tmp9, %tmp7            
230  br i1 %exitcond, label %return, label %bb
231
232; Should only be one load in the loop.
233; CHECK: bb:
234; CHECK: load double*
235; CHECK-NOT: load double*
236; CHECK: br i1 %exitcond
237
238return:                                           
239  ret void
240}
241
242;; Here the loaded address isn't available in 'block2' at all, requiring a new
243;; GEP to be inserted into it.
244define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
245; CHECK-LABEL: @test8(
246block1:
247	br i1 %C, label %block2, label %block3
248
249block2:
250 br label %block4
251; CHECK: block2:
252; CHECK:   load i32*
253; CHECK:   br label %block4
254
255block3:
256  %A = getelementptr i32* %p, i32 1
257  store i32 0, i32* %A
258  br label %block4
259
260block4:
261  %P2 = phi i32* [%p, %block3], [%q, %block2]
262  %P3 = getelementptr i32* %P2, i32 1
263  %PRE = load i32* %P3
264  ret i32 %PRE
265; CHECK: block4:
266; CHECK-NEXT: phi i32 [
267; CHECK-NOT: load
268; CHECK: ret i32
269}
270
271;void test9(int N, double* G) {
272;  long j;
273;  for (j = 1; j < N - 1; j++)
274;      G[j+1] = G[j] + G[j+1];
275;}
276
277; This requires phi translation of the adds.
278define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
279entry:
280  add i32 0, 0
281  %1 = add i32 %N, -1                             
282  %2 = icmp sgt i32 %1, 1                         
283  br i1 %2, label %bb.nph, label %return
284
285bb.nph:                                           
286  %tmp = sext i32 %1 to i64                       
287  %tmp7 = add i64 %tmp, -1                        
288  br label %bb
289
290; CHECK: bb.nph:
291; CHECK:   load double*
292; CHECK:   br label %bb
293
294bb:                                               
295  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 
296  %tmp8 = add i64 %indvar, 2                      
297  %scevgep = getelementptr double* %G, i64 %tmp8  
298  %tmp9 = add i64 %indvar, 1                      
299  %scevgep10 = getelementptr double* %G, i64 %tmp9 
300  %3 = load double* %scevgep10, align 8           
301  %4 = load double* %scevgep, align 8             
302  %5 = fadd double %3, %4                         
303  store double %5, double* %scevgep, align 8
304  %exitcond = icmp eq i64 %tmp9, %tmp7            
305  br i1 %exitcond, label %return, label %bb
306
307; Should only be one load in the loop.
308; CHECK: bb:
309; CHECK: load double*
310; CHECK-NOT: load double*
311; CHECK: br i1 %exitcond
312
313return:                                           
314  ret void
315}
316
317;void test10(int N, double* G) {
318;  long j;
319;  for (j = 1; j < N - 1; j++)
320;      G[j] = G[j] + G[j+1] + G[j-1];
321;}
322
323; PR5501
324define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
325entry:
326  %0 = add i32 %N, -1
327  %1 = icmp sgt i32 %0, 1
328  br i1 %1, label %bb.nph, label %return
329
330bb.nph:
331  %tmp = sext i32 %0 to i64
332  %tmp8 = add i64 %tmp, -1
333  br label %bb
334; CHECK: bb.nph:
335; CHECK:   load double*
336; CHECK:   load double*
337; CHECK:   br label %bb
338
339
340bb:
341  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
342  %scevgep = getelementptr double* %G, i64 %indvar
343  %tmp9 = add i64 %indvar, 2
344  %scevgep10 = getelementptr double* %G, i64 %tmp9
345  %tmp11 = add i64 %indvar, 1
346  %scevgep12 = getelementptr double* %G, i64 %tmp11
347  %2 = load double* %scevgep12, align 8
348  %3 = load double* %scevgep10, align 8
349  %4 = fadd double %2, %3
350  %5 = load double* %scevgep, align 8
351  %6 = fadd double %4, %5
352  store double %6, double* %scevgep12, align 8
353  %exitcond = icmp eq i64 %tmp11, %tmp8
354  br i1 %exitcond, label %return, label %bb
355
356; Should only be one load in the loop.
357; CHECK: bb:
358; CHECK: load double*
359; CHECK-NOT: load double*
360; CHECK: br i1 %exitcond
361
362return:
363  ret void
364}
365
366; Test critical edge splitting.
367define i32 @test11(i32* %p, i1 %C, i32 %N) {
368; CHECK-LABEL: @test11(
369block1:
370        br i1 %C, label %block2, label %block3
371
372block2:
373 %cond = icmp sgt i32 %N, 1
374 br i1 %cond, label %block4, label %block5
375; CHECK: load i32* %p
376; CHECK-NEXT: br label %block4
377
378block3:
379  store i32 0, i32* %p
380  br label %block4
381
382block4:
383  %PRE = load i32* %p
384  br label %block5
385
386block5:
387  %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
388  ret i32 %ret
389; CHECK: block4:
390; CHECK-NEXT: phi i32
391}
392