neon.ll revision ba92a7085bbb8916334a6571ff33355873883173
1target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
2target triple = "armv7-none-linux-gnueabi"
3
4;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5;;;;;;;;;               INTRINSICS               ;;;;;;;;;;
6;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7
8declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
9declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
10declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
11declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
12declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
13declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
14declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
15declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
16
17declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
18declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
19declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
20declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
21declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
22declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
23declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
24declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
25
26declare <8 x i8>  @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
27declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
28declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
29
30declare <8 x i8>  @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
31declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
32declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
33
34declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
35declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
36declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
37
38declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
39declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
40
41declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
42declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
43
44declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
45declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
46
47declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
48declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
49
50;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
51;;;;;;;;;                HELPERS                 ;;;;;;;;;;
52;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
53
54define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
55  %1 = insertelement <4 x float> undef, float %in, i32 0
56  %2 = insertelement <4 x float> %1, float %in, i32 1
57  %3 = insertelement <4 x float> %2, float %in, i32 2
58  %4 = insertelement <4 x float> %3, float %in, i32 3
59  ret <4 x float> %4
60}
61
62define internal <4 x i32> @smear_4i(i32 %in) nounwind readnone alwaysinline {
63  %1 = insertelement <4 x i32> undef, i32 %in, i32 0
64  %2 = insertelement <4 x i32> %1, i32 %in, i32 1
65  %3 = insertelement <4 x i32> %2, i32 %in, i32 2
66  %4 = insertelement <4 x i32> %3, i32 %in, i32 3
67  ret <4 x i32> %4
68}
69
70define internal <4 x i16> @smear_4s(i16 %in) nounwind readnone alwaysinline {
71  %1 = insertelement <4 x i16> undef, i16 %in, i32 0
72  %2 = insertelement <4 x i16> %1, i16 %in, i32 1
73  %3 = insertelement <4 x i16> %2, i16 %in, i32 2
74  %4 = insertelement <4 x i16> %3, i16 %in, i32 3
75  ret <4 x i16> %4
76}
77
78
79
80define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
81  %1 = insertelement <2 x float> undef, float %in, i32 0
82  %2 = insertelement <2 x float> %1, float %in, i32 1
83  ret <2 x float> %2
84}
85
86define internal <2 x i32> @smear_2i(i32 %in) nounwind readnone alwaysinline {
87  %1 = insertelement <2 x i32> undef, i32 %in, i32 0
88  %2 = insertelement <2 x i32> %1, i32 %in, i32 1
89  ret <2 x i32> %2
90}
91
92define internal <2 x i16> @smear_2s(i16 %in) nounwind readnone alwaysinline {
93  %1 = insertelement <2 x i16> undef, i16 %in, i32 0
94  %2 = insertelement <2 x i16> %1, i16 %in, i32 1
95  ret <2 x i16> %2
96}
97
98
99define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline {
100  %1 = insertelement <4 x i32> undef, i32 %in, i32 0
101  %2 = insertelement <4 x i32> %1, i32 %in, i32 1
102  %3 = insertelement <4 x i32> %2, i32 %in, i32 2
103  %4 = insertelement <4 x i32> %3, i32 %in, i32 3
104  ret <4 x i32> %4
105}
106
107
108;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
109;;;;;;;;;                 CLAMP                  ;;;;;;;;;;
110;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
111
112define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly {
113  %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone
114  %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone
115  ret <4 x float> %2
116}
117
118define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
119  %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
120  %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
121  %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly
122  ret <4 x float> %out
123}
124
125define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly {
126  %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
127  %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
128  %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
129  %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
130  %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
131  %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
132  ret <3 x float> %c
133}
134
135define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
136  %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
137  %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
138  %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
139  %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
140  %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
141  %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
142  ret <3 x float> %c
143}
144
145define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly {
146  %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone
147  %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone
148  ret <2 x float> %2
149}
150
151define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
152  %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
153  %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
154  %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone
155  %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone
156  ret <2 x float> %b
157}
158
159define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly {
160  %1 = fcmp olt float %value, %high
161  %2 = select i1 %1, float %value, float %high
162  %3 = fcmp ogt float %2, %low
163  %4 = select i1 %3, float %2, float %low
164  ret float %4
165}
166
167
168
169define <4 x i32> @_Z5clampDv4_iS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind readonly {
170  %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind readnone
171  %2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone
172  ret <4 x i32> %2
173}
174
175define <4 x i32> @_Z5clampDv4_iii(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly {
176  %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
177  %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
178  %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind readnone
179  %2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readnone
180  ret <4 x i32> %2
181}
182
183define <3 x i32> @_Z5clampDv3_iS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind readonly {
184  %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
185  %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
186  %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
187  %a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone
188  %b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone
189  %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
190  ret <3 x i32> %c
191}
192
193define <3 x i32> @_Z5clampDv3_iii(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly {
194  %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
195  %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
196  %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
197  %a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone
198  %b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone
199  %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
200  ret <3 x i32> %c
201}
202
203define <2 x i32> @_Z5clampDv2_iS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind readonly {
204  %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind readnone
205  %2 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone
206  ret <2 x i32> %2
207}
208
209define <2 x i32> @_Z5clampDv2_iii(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly {
210  %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone
211  %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone
212  %a = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind readnone
213  %b = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readnone
214  ret <2 x i32> %b
215}
216
217
218
219define <4 x i32> @_Z5clampDv4_jS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind readonly {
220  %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind readnone
221  %2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone
222  ret <4 x i32> %2
223}
224
225define <4 x i32> @_Z5clampDv4_jjj(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly {
226  %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
227  %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
228  %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind readnone
229  %2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readnone
230  ret <4 x i32> %2
231}
232
233define <3 x i32> @_Z5clampDv3_jS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind readonly {
234  %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
235  %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
236  %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
237  %a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone
238  %b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone
239  %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
240  ret <3 x i32> %c
241}
242
243define <3 x i32> @_Z5clampDv3_jjj(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly {
244  %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
245  %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone
246  %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone
247  %a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone
248  %b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone
249  %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
250  ret <3 x i32> %c
251}
252
253define <2 x i32> @_Z5clampDv2_jS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind readonly {
254  %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind readnone
255  %2 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone
256  ret <2 x i32> %2
257}
258
259define <2 x i32> @_Z5clampDv2_jjj(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly {
260  %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone
261  %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone
262  %a = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind readnone
263  %b = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readnone
264  ret <2 x i32> %b
265}
266
267
268;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
269;;;;;;;;;                  FMAX                  ;;;;;;;;;;
270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
271
272define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
273  %1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone
274  ret <4 x float> %1
275}
276
277define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
278  %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
279  %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone
280  ret <4 x float> %2
281}
282
283define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
284  %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
285  %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
286  %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
287  %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
288  ret <3 x float> %4
289}
290
291define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
292  %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293  %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
294  %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
295  %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
296  ret <3 x float> %c
297}
298
299define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
300  %1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone
301  ret <2 x float> %1
302}
303
304define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
305  %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
306  %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone
307  ret <2 x float> %2
308}
309
310define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly {
311  %1 = fcmp ogt float %v1, %v2
312  %2 = select i1 %1, float %v1, float %v2
313  ret float %2
314}
315
316
317;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
318;;;;;;;;;                  FMIN                  ;;;;;;;;;;
319;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
320
321define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
322  %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone
323  ret <4 x float> %1
324}
325
326define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
327  %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
328  %2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone
329  ret <4 x float> %2
330}
331
332define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
333  %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
334  %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
335  %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
336  %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
337  ret <3 x float> %4
338}
339
340define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
341  %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
342  %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
343  %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
344  %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
345  ret <3 x float> %c
346}
347
348define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
349  %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone
350  ret <2 x float> %1
351}
352
353define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
354  %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
355  %2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone
356  ret <2 x float> %2
357}
358
359define float @_Z4fminff(float %v1, float %v2) nounwind readnone {
360  %1 = fcmp olt float %v1, %v2
361  %2 = select i1 %1, float %v1, float %v2
362  ret float %2
363}
364
365
366;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
367;;;;;;;;;                  MAX                   ;;;;;;;;;;
368;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
369
370define signext i8 @_Z3maxcc(i8 signext %v1, i8 signext %v2) nounwind readnone {
371  %1 = icmp sgt i8 %v1, %v2
372  %2 = select i1 %1, i8 %v1, i8 %v2
373  ret i8 %2
374}
375
376define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
377  %1 = sext <2 x i8> %v1 to <2 x i32>
378  %2 = sext <2 x i8> %v2 to <2 x i32>
379  %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
380  %4 = trunc <2 x i32> %3 to <2 x i8>
381  ret <2 x i8> %4
382}
383
384define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
385  %1 = sext <3 x i8> %v1 to <3 x i32>
386  %2 = sext <3 x i8> %v2 to <3 x i32>
387  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
388  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
389  %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
390  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
391  %7 = trunc <3 x i32> %6 to <3 x i8>
392  ret <3 x i8> %7
393}
394
395define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
396  %1 = sext <4 x i8> %v1 to <4 x i32>
397  %2 = sext <4 x i8> %v2 to <4 x i32>
398  %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
399  %4 = trunc <4 x i32> %3 to <4 x i8>
400  ret <4 x i8> %4
401}
402
403define signext i16 @_Z3maxss(i16 signext %v1, i16 signext %v2) nounwind readnone {
404  %1 = icmp sgt i16 %v1, %v2
405  %2 = select i1 %1, i16 %v1, i16 %v2
406  ret i16 %2
407}
408
409define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
410  %1 = sext <2 x i16> %v1 to <2 x i32>
411  %2 = sext <2 x i16> %v2 to <2 x i32>
412  %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
413  %4 = trunc <2 x i32> %3 to <2 x i16>
414  ret <2 x i16> %4
415}
416
417define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
418  %1 = sext <3 x i16> %v1 to <3 x i32>
419  %2 = sext <3 x i16> %v2 to <3 x i32>
420  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
421  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
422  %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
423  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
424  %7 = trunc <3 x i32> %6 to <3 x i16>
425  ret <3 x i16> %7
426}
427
428define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
429  %1 = sext <4 x i16> %v1 to <4 x i32>
430  %2 = sext <4 x i16> %v2 to <4 x i32>
431  %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
432  %4 = trunc <4 x i32> %3 to <4 x i16>
433  ret <4 x i16> %4
434}
435
436define i32 @_Z3maxii(i32 %v1, i32 %v2) nounwind readnone {
437  %1 = icmp sgt i32 %v1, %v2
438  %2 = select i1 %1, i32 %v1, i32 %v2
439  ret i32 %2
440}
441
442define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
443  %1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
444  ret <2 x i32> %1
445}
446
447define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
448  %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
449  %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
450  %3 = tail call <4 x i32   > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
451  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
452  ret <3 x i32> %4
453}
454
455define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
456  %1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
457  ret <4 x i32> %1
458}
459
460define i64 @_Z3maxxx(i64 %v1, i64 %v2) nounwind readnone {
461  %1 = icmp sgt i64 %v1, %v2
462  %2 = select i1 %1, i64 %v1, i64 %v2
463  ret i64 %2
464}
465
466; TODO:  long vector types
467
468define zeroext i8 @_Z3maxhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone {
469  %1 = icmp ugt i8 %v1, %v2
470  %2 = select i1 %1, i8 %v1, i8 %v2
471  ret i8 %2
472}
473
474define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
475  %1 = zext <2 x i8> %v1 to <2 x i32>
476  %2 = zext <2 x i8> %v2 to <2 x i32>
477  %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
478  %4 = trunc <2 x i32> %3 to <2 x i8>
479  ret <2 x i8> %4
480}
481
482define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
483  %1 = zext <3 x i8> %v1 to <3 x i32>
484  %2 = zext <3 x i8> %v2 to <3 x i32>
485  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
486  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
487  %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
488  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
489  %7 = trunc <3 x i32> %6 to <3 x i8>
490  ret <3 x i8> %7
491}
492
493define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
494  %1 = zext <4 x i8> %v1 to <4 x i32>
495  %2 = zext <4 x i8> %v2 to <4 x i32>
496  %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
497  %4 = trunc <4 x i32> %3 to <4 x i8>
498  ret <4 x i8> %4
499}
500
501define zeroext i16 @_Z3maxtt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone {
502  %1 = icmp ugt i16 %v1, %v2
503  %2 = select i1 %1, i16 %v1, i16 %v2
504  ret i16 %2
505}
506
507define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
508  %1 = zext <2 x i16> %v1 to <2 x i32>
509  %2 = zext <2 x i16> %v2 to <2 x i32>
510  %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
511  %4 = trunc <2 x i32> %3 to <2 x i16>
512  ret <2 x i16> %4
513}
514
515define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
516  %1 = zext <3 x i16> %v1 to <3 x i32>
517  %2 = zext <3 x i16> %v2 to <3 x i32>
518  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
519  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
520  %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
521  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
522  %7 = trunc <3 x i32> %6 to <3 x i16>
523  ret <3 x i16> %7
524}
525
526define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
527  %1 = zext <4 x i16> %v1 to <4 x i32>
528  %2 = zext <4 x i16> %v2 to <4 x i32>
529  %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
530  %4 = trunc <4 x i32> %3 to <4 x i16>
531  ret <4 x i16> %4
532}
533
534define i32 @_Z3maxjj(i32 %v1, i32 %v2) nounwind readnone {
535  %1 = icmp ugt i32 %v1, %v2
536  %2 = select i1 %1, i32 %v1, i32 %v2
537  ret i32 %2
538}
539
540define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
541  %1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
542  ret <2 x i32> %1
543}
544
545define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
546  %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
547  %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
548  %3 = tail call <4 x i32   > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
549  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
550  ret <3 x i32> %4
551}
552
553define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
554  %1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
555  ret <4 x i32> %1
556}
557
558define i64 @_Z3maxyy(i64 %v1, i64 %v2) nounwind readnone {
559  %1 = icmp ugt i64 %v1, %v2
560  %2 = select i1 %1, i64 %v1, i64 %v2
561  ret i64 %2
562}
563
564; TODO:  long vector types
565
566define float @_Z3maxff(float %v1, float %v2) nounwind readnone {
567  %1 = tail call float @_Z4fmaxff(float %v1, float %v2)
568  ret float %1
569}
570
571define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
572  %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2)
573  ret <2 x float> %1
574}
575
576define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
577  %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2)
578  ret <2 x float> %1
579}
580
581define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
582  %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2)
583  ret <3 x float> %1
584}
585
586define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
587  %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2)
588  ret <3 x float> %1
589}
590
591define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
592  %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2)
593  ret <4 x float> %1
594}
595
596define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
597  %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2)
598  ret <4 x float> %1
599}
600
601
602;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
603;;;;;;;;;                  MIN                   ;;;;;;;;;;
604;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
605
606define signext i8 @_Z3mincc(i8 signext %v1, i8 signext %v2) nounwind readnone {
607  %1 = icmp slt i8 %v1, %v2
608  %2 = select i1 %1, i8 %v1, i8 %v2
609  ret i8 %2
610}
611
612define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
613  %1 = sext <2 x i8> %v1 to <2 x i32>
614  %2 = sext <2 x i8> %v2 to <2 x i32>
615  %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
616  %4 = trunc <2 x i32> %3 to <2 x i8>
617  ret <2 x i8> %4
618}
619
620define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
621  %1 = sext <3 x i8> %v1 to <3 x i32>
622  %2 = sext <3 x i8> %v2 to <3 x i32>
623  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
624  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
625  %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
626  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
627  %7 = trunc <3 x i32> %6 to <3 x i8>
628  ret <3 x i8> %7
629}
630
631define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
632  %1 = sext <4 x i8> %v1 to <4 x i32>
633  %2 = sext <4 x i8> %v2 to <4 x i32>
634  %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
635  %4 = trunc <4 x i32> %3 to <4 x i8>
636  ret <4 x i8> %4
637}
638
639define signext i16 @_Z3minss(i16 signext %v1, i16 signext %v2) nounwind readnone {
640  %1 = icmp slt i16 %v1, %v2
641  %2 = select i1 %1, i16 %v1, i16 %v2
642  ret i16 %2
643}
644
645define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
646  %1 = sext <2 x i16> %v1 to <2 x i32>
647  %2 = sext <2 x i16> %v2 to <2 x i32>
648  %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
649  %4 = trunc <2 x i32> %3 to <2 x i16>
650  ret <2 x i16> %4
651}
652
653define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
654  %1 = sext <3 x i16> %v1 to <3 x i32>
655  %2 = sext <3 x i16> %v2 to <3 x i32>
656  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
657  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
658  %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
659  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
660  %7 = trunc <3 x i32> %6 to <3 x i16>
661  ret <3 x i16> %7
662}
663
664define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
665  %1 = sext <4 x i16> %v1 to <4 x i32>
666  %2 = sext <4 x i16> %v2 to <4 x i32>
667  %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
668  %4 = trunc <4 x i32> %3 to <4 x i16>
669  ret <4 x i16> %4
670}
671
672define i32 @_Z3minii(i32 %v1, i32 %v2) nounwind readnone {
673  %1 = icmp slt i32 %v1, %v2
674  %2 = select i1 %1, i32 %v1, i32 %v2
675  ret i32 %2
676}
677
678define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
679  %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
680  ret <2 x i32> %1
681}
682
683define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
684  %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
685  %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
686  %3 = tail call <4 x i32   > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
687  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
688  ret <3 x i32> %4
689}
690
691define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
692  %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
693  ret <4 x i32> %1
694}
695
696define i64 @_Z3minxx(i64 %v1, i64 %v2) nounwind readnone {
697  %1 = icmp slt i64 %v1, %v2
698  %2 = select i1 %1, i64 %v1, i64 %v2
699  ret i64 %2
700}
701
702; TODO:  long vector types
703
704define zeroext i8 @_Z3minhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone {
705  %1 = icmp ult i8 %v1, %v2
706  %2 = select i1 %1, i8 %v1, i8 %v2
707  ret i8 %2
708}
709
710define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
711  %1 = zext <2 x i8> %v1 to <2 x i32>
712  %2 = zext <2 x i8> %v2 to <2 x i32>
713  %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
714  %4 = trunc <2 x i32> %3 to <2 x i8>
715  ret <2 x i8> %4
716}
717
718define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
719  %1 = zext <3 x i8> %v1 to <3 x i32>
720  %2 = zext <3 x i8> %v2 to <3 x i32>
721  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
722  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
723  %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
724  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
725  %7 = trunc <3 x i32> %6 to <3 x i8>
726  ret <3 x i8> %7
727}
728
729define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
730  %1 = zext <4 x i8> %v1 to <4 x i32>
731  %2 = zext <4 x i8> %v2 to <4 x i32>
732  %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
733  %4 = trunc <4 x i32> %3 to <4 x i8>
734  ret <4 x i8> %4
735}
736
737define zeroext i16 @_Z3mintt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone {
738  %1 = icmp ult i16 %v1, %v2
739  %2 = select i1 %1, i16 %v1, i16 %v2
740  ret i16 %2
741}
742
743define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
744  %1 = zext <2 x i16> %v1 to <2 x i32>
745  %2 = zext <2 x i16> %v2 to <2 x i32>
746  %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
747  %4 = trunc <2 x i32> %3 to <2 x i16>
748  ret <2 x i16> %4
749}
750
751define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
752  %1 = zext <3 x i16> %v1 to <3 x i32>
753  %2 = zext <3 x i16> %v2 to <3 x i32>
754  %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
755  %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
756  %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
757  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
758  %7 = trunc <3 x i32> %6 to <3 x i16>
759  ret <3 x i16> %7
760}
761
762define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
763  %1 = zext <4 x i16> %v1 to <4 x i32>
764  %2 = zext <4 x i16> %v2 to <4 x i32>
765  %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
766  %4 = trunc <4 x i32> %3 to <4 x i16>
767  ret <4 x i16> %4
768}
769
770define i32 @_Z3minjj(i32 %v1, i32 %v2) nounwind readnone {
771  %1 = icmp ult i32 %v1, %v2
772  %2 = select i1 %1, i32 %v1, i32 %v2
773  ret i32 %2
774}
775
776define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
777  %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
778  ret <2 x i32> %1
779}
780
781define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
782  %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
783  %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
784  %3 = tail call <4 x i32   > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
785  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
786  ret <3 x i32> %4
787}
788
789define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
790  %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
791  ret <4 x i32> %1
792}
793
794define i64 @_Z3minyy(i64 %v1, i64 %v2) nounwind readnone {
795  %1 = icmp ult i64 %v1, %v2
796  %2 = select i1 %1, i64 %v1, i64 %v2
797  ret i64 %2
798}
799
800; TODO:  long vector types
801
802define float @_Z3minff(float %v1, float %v2) nounwind readnone {
803  %1 = tail call float @_Z4fminff(float %v1, float %v2)
804  ret float %1
805}
806
807define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
808  %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2)
809  ret <2 x float> %1
810}
811
812define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
813  %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2)
814  ret <2 x float> %1
815}
816
817define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
818  %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2)
819  ret <3 x float> %1
820}
821
822define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
823  %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2)
824  ret <3 x float> %1
825}
826
827define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
828  %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2)
829  ret <4 x float> %1
830}
831
832define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
833  %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2)
834  ret <4 x float> %1
835}
836
837
838;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
839;;;;;;;;;                  YUV                   ;;;;;;;;;;
840;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
841
842@yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16
843@yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16
844@yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
845@yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16
846
847
848define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline {
849  %_sy = zext i8 %pY to i32
850  %_su = zext i8 %pU to i32
851  %_sv = zext i8 %pV to i32
852
853  %_sy2 = add i32 -16, %_sy
854  %_sy3 = mul i32 298, %_sy2
855  %_su2 = add i32 -128, %_su
856  %_sv2 = add i32 -128, %_sv
857  %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone
858  %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone
859  %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone
860
861  %mu = load <4 x i32>* @yuv_U, align 8
862  %mv = load <4 x i32>* @yuv_V, align 8
863  %_u2 = mul <4 x i32> %_u, %mu
864  %_v2 = mul <4 x i32> %_v, %mv
865  %_y2 = add <4 x i32> %_y, %_u2
866  %_y3 = add <4 x i32> %_y2, %_v2
867
868 ; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 8, i32 8, i32 8>) nounwind readnone
869;  %r2 = trunc <4 x i16> %r1 to <4 x i8>
870;  ret <4 x i8> %r2
871
872  %c0 = load <4 x i32>* @yuv_0, align 8
873  %c255 = load <4 x i32>* @yuv_255, align 8
874  %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone
875  %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone
876  %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8>
877  %r4 = trunc <4 x i32> %r3 to <4 x i8>
878  ret <4 x i8> %r4
879}
880
881;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
882;;;;;;;;;              half_RECIP              ;;;;;;;;;;
883;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
884
885define <2 x float> @_Z10half_recipDv2_f(<2 x float> %v) nounwind readnone {
886  %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
887  %2 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %1, <2 x float> %v) nounwind readnone
888  %3 = fmul <2 x float> %1, %2
889  %4 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %3, <2 x float> %v) nounwind readnone
890  %5 = fmul <2 x float> %4, %3
891  ret <2 x float> %5
892}
893
894define <4 x float> @_Z10half_recipDv4_f(<4 x float> %v) nounwind readnone {
895  %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
896  %2 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %1, <4 x float> %v) nounwind readnone
897  %3 = fmul <4 x float> %1, %2
898  %4 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %3, <4 x float> %v) nounwind readnone
899  %5 = fmul <4 x float> %4, %3
900  ret <4 x float> %5
901}
902
903define <3 x float> @_Z10half_recipDv3_f(<3 x float> %v) nounwind readnone {
904  %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
905  %2 = tail call <4 x float> @_Z10half_recipDv4_f(<4 x float> %1) nounwind readnone
906  %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
907  ret <3 x float> %3
908}
909
910
911;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
912;;;;;;;;;              half_RSQRT              ;;;;;;;;;;
913;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
914
915define float @_Z10half_rsqrtf(float %v) {
916  %1 = insertelement <2 x float> undef, float %v, i32 0
917  %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
918  %3 = extractelement <2 x float> %2, i32 0
919  ret float %3
920}
921
922define <2 x float> @_Z10half_rsqrtDv2_f(<2 x float> %v) nounwind readnone {
923  %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
924  ret <2 x float> %1
925}
926
927define <3 x float> @_Z10half_rsqrtDv3_f(<3 x float> %v) nounwind readnone {
928  %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
929  %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
930  %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
931  ret <3 x float> %3
932}
933
934define <4 x float> @_Z10half_rsqrtDv4_f(<4 x float> %v) nounwind readnone {
935  %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
936  ret <4 x float> %1
937}
938
939;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
940;;;;;;;;;              matrix                    ;;;;;;;;;;
941;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
942
943declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
944
945%struct.rs_matrix4x4 = type { [16 x float] }
946%struct.rs_matrix3x3 = type { [9 x float] }
947%struct.rs_matrix2x2 = type { [4 x float] }
948
949define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
950  %1 = insertelement <4 x float> undef, float %in, i32 0
951  %2 = insertelement <4 x float> %1, float %in, i32 1
952  %3 = insertelement <4 x float> %2, float %in, i32 2
953  %4 = insertelement <4 x float> %3, float %in, i32 3
954  ret <4 x float> %4
955}
956
957
958define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
959  %x0 = extractelement <3 x float> %in, i32 0
960  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
961  %y0 = extractelement <3 x float> %in, i32 1
962  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
963  %z0 = extractelement <3 x float> %in, i32 2
964  %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
965
966  %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
967  %px2 = bitcast float* %px to i8*
968  %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind
969
970  %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
971  %py2 = bitcast float* %py to i8*
972  %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind
973
974  %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5
975  %pz2 = bitcast float* %pz to i8*
976  %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind
977  %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
978
979  %a1 = fmul <4 x float> %x, %xm
980  %a2 = fmul <4 x float> %y, %ym
981  %a3 = fadd <4 x float> %a1, %a2
982  %a4 = fmul <4 x float> %z, %zm
983  %a5 = fadd <4 x float> %a4, %a3
984  %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
985  ret <3 x float> %a6
986}
987
988define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
989  %x0 = extractelement <2 x float> %in, i32 0
990  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
991  %y0 = extractelement <2 x float> %in, i32 1
992  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
993
994  %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
995  %px2 = bitcast float* %px to <4 x float>*
996  %xm = load <4 x float>* %px2, align 4
997  %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
998  %py2 = bitcast float* %py to <4 x float>*
999  %ym = load <4 x float>* %py2, align 4
1000
1001  %a1 = fmul <4 x float> %x, %xm
1002  %a2 = fmul <4 x float> %y, %ym
1003  %a3 = fadd <4 x float> %a1, %a2
1004  %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
1005  ret <3 x float> %a4
1006}
1007
1008define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
1009  %x0 = extractelement <4 x float> %in, i32 0
1010  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1011  %y0 = extractelement <4 x float> %in, i32 1
1012  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1013  %z0 = extractelement <4 x float> %in, i32 2
1014  %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1015  %w0 = extractelement <4 x float> %in, i32 3
1016  %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
1017
1018  %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
1019  %px2 = bitcast float* %px to <4 x float>*
1020  %xm = load <4 x float>* %px2, align 4
1021  %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
1022  %py2 = bitcast float* %py to <4 x float>*
1023  %ym = load <4 x float>* %py2, align 4
1024  %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
1025  %pz2 = bitcast float* %pz to <4 x float>*
1026  %zm = load <4 x float>* %pz2, align 4
1027  %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
1028  %pw2 = bitcast float* %pw to <4 x float>*
1029  %wm = load <4 x float>* %pw2, align 4
1030
1031  %a1 = fmul <4 x float> %x, %xm
1032  %a2 = fmul <4 x float> %y, %ym
1033  %a3 = fadd <4 x float> %a1, %a2
1034  %a4 = fmul <4 x float> %z, %zm
1035  %a5 = fadd <4 x float> %a3, %a4
1036  %a6 = fmul <4 x float> %w, %wm
1037  %a7 = fadd <4 x float> %a5, %a6
1038  ret <4 x float> %a7
1039}
1040
1041define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
1042  %x0 = extractelement <3 x float> %in, i32 0
1043  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1044  %y0 = extractelement <3 x float> %in, i32 1
1045  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1046  %z0 = extractelement <3 x float> %in, i32 2
1047  %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
1048
1049  %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
1050  %px2 = bitcast float* %px to <4 x float>*
1051  %xm = load <4 x float>* %px2, align 4
1052  %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
1053  %py2 = bitcast float* %py to <4 x float>*
1054  %ym = load <4 x float>* %py2, align 4
1055  %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
1056  %pz2 = bitcast float* %pz to <4 x float>*
1057  %zm = load <4 x float>* %pz2, align 4
1058  %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
1059  %pw2 = bitcast float* %pw to <4 x float>*
1060  %wm = load <4 x float>* %pw2, align 4
1061
1062  %a1 = fmul <4 x float> %x, %xm
1063  %a2 = fadd <4 x float> %wm, %a1
1064  %a3 = fmul <4 x float> %y, %ym
1065  %a4 = fadd <4 x float> %a2, %a3
1066  %a5 = fmul <4 x float> %z, %zm
1067  %a6 = fadd <4 x float> %a4, %a5
1068  ret <4 x float> %a6
1069}
1070
1071define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
1072  %x0 = extractelement <2 x float> %in, i32 0
1073  %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
1074  %y0 = extractelement <2 x float> %in, i32 1
1075  %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
1076
1077  %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
1078  %px2 = bitcast float* %px to <4 x float>*
1079  %xm = load <4 x float>* %px2, align 4
1080  %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
1081  %py2 = bitcast float* %py to <4 x float>*
1082  %ym = load <4 x float>* %py2, align 4
1083  %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
1084  %pw2 = bitcast float* %pw to <4 x float>*
1085  %wm = load <4 x float>* %pw2, align 4
1086
1087  %a1 = fmul <4 x float> %x, %xm
1088  %a2 = fadd <4 x float> %wm, %a1
1089  %a3 = fmul <4 x float> %y, %ym
1090  %a4 = fadd <4 x float> %a2, %a3
1091  ret <4 x float> %a4
1092}
1093
1094
1095
1096;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1097;;;;;;;;;              pixel ops                 ;;;;;;;;;;
1098;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1099
1100
1101@fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, align 16
1102@fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
1103@fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16
1104
1105declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
1106declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
1107
1108; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
1109define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
1110    %f255 = load <4 x float>* @fc_255.0, align 16
1111    %f05 = load <4 x float>* @fc_0.5, align 16
1112    %f0 = load <4 x float>* @fc_0, align 16
1113    %v1 = fmul <4 x float> %f255, %color
1114    %v2 = fadd <4 x float> %f05, %v1
1115    %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone
1116    %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone
1117    ret <4 x i8> %v4
1118}
1119
1120; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
1121define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone {
1122    %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1123    %2 = insertelement <4 x float> %1, float 1.0, i32 3
1124    %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
1125    ret <4 x i8> %3
1126}
1127
1128; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
1129define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
1130    %1 = insertelement <4 x float> undef, float %r, i32 0
1131    %2 = insertelement <4 x float> %1, float %g, i32 1
1132    %3 = insertelement <4 x float> %2, float %b, i32 2
1133    %4 = insertelement <4 x float> %3, float 1.0, i32 3
1134    %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
1135    ret <4 x i8> %5
1136}
1137
1138; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
1139define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnone {
1140    %1 = insertelement <4 x float> undef, float %r, i32 0
1141    %2 = insertelement <4 x float> %1, float %g, i32 1
1142    %3 = insertelement <4 x float> %2, float %b, i32 2
1143    %4 = insertelement <4 x float> %3, float %a, i32 3
1144    %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
1145    ret <4 x i8> %5
1146}
1147
1148