neon.ll revision ba92a7085bbb8916334a6571ff33355873883173
1target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 2target triple = "armv7-none-linux-gnueabi" 3 4;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 5;;;;;;;;; INTRINSICS ;;;;;;;;;; 6;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7 8declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone 9declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone 10declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 11declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 12declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 13declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 14declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 15declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 16 17declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone 18declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone 19declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 20declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 21declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 22declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 23declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 24declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 25 26declare <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 27declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 28declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 29 30declare <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 31declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 32declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 33 34declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 35declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 36declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 37 38declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone 39declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone 40 41declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone 42declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone 43 44declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone 45declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone 46 47declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone 48declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone 49 50;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 51;;;;;;;;; HELPERS ;;;;;;;;;; 52;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 53 54define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline { 55 %1 = insertelement <4 x float> undef, float %in, i32 0 56 %2 = insertelement <4 x float> %1, float %in, i32 1 57 %3 = insertelement <4 x float> %2, float %in, i32 2 58 %4 = insertelement <4 x float> %3, float %in, i32 3 59 ret <4 x float> %4 60} 61 62define internal <4 x i32> @smear_4i(i32 %in) nounwind readnone alwaysinline { 63 %1 = insertelement <4 x i32> undef, i32 %in, i32 0 64 %2 = insertelement <4 x i32> %1, i32 %in, i32 1 65 %3 = insertelement <4 x i32> %2, i32 %in, i32 2 66 %4 = insertelement <4 x i32> %3, i32 %in, i32 3 67 ret <4 x i32> %4 68} 69 70define internal <4 x i16> @smear_4s(i16 %in) nounwind readnone alwaysinline { 71 %1 = insertelement <4 x i16> undef, i16 %in, i32 0 72 %2 = insertelement <4 x i16> %1, i16 %in, i32 1 73 %3 = insertelement <4 x i16> %2, i16 %in, i32 2 74 %4 = insertelement <4 x i16> %3, i16 %in, i32 3 75 ret <4 x i16> %4 76} 77 78 79 80define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline { 81 %1 = insertelement <2 x float> undef, float %in, i32 0 82 %2 = insertelement <2 x float> %1, float %in, i32 1 83 ret <2 x float> %2 84} 85 86define internal <2 x i32> @smear_2i(i32 %in) nounwind readnone alwaysinline { 87 %1 = insertelement <2 x i32> undef, i32 %in, i32 0 88 %2 = insertelement <2 x i32> %1, i32 %in, i32 1 89 ret <2 x i32> %2 90} 91 92define internal <2 x i16> @smear_2s(i16 %in) nounwind readnone alwaysinline { 93 %1 = insertelement <2 x i16> undef, i16 %in, i32 0 94 %2 = insertelement <2 x i16> %1, i16 %in, i32 1 95 ret <2 x i16> %2 96} 97 98 99define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline { 100 %1 = insertelement <4 x i32> undef, i32 %in, i32 0 101 %2 = insertelement <4 x i32> %1, i32 %in, i32 1 102 %3 = insertelement <4 x i32> %2, i32 %in, i32 2 103 %4 = insertelement <4 x i32> %3, i32 %in, i32 3 104 ret <4 x i32> %4 105} 106 107 108;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 109;;;;;;;;; CLAMP ;;;;;;;;;; 110;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 111 112define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly { 113 %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone 114 %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone 115 ret <4 x float> %2 116} 117 118define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly { 119 %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone 120 %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone 121 %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly 122 ret <4 x float> %out 123} 124 125define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly { 126 %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 127 %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 128 %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 129 %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone 130 %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone 131 %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 132 ret <3 x float> %c 133} 134 135define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly { 136 %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 137 %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone 138 %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone 139 %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone 140 %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone 141 %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 142 ret <3 x float> %c 143} 144 145define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly { 146 %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone 147 %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone 148 ret <2 x float> %2 149} 150 151define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly { 152 %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone 153 %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone 154 %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone 155 %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone 156 ret <2 x float> %b 157} 158 159define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly { 160 %1 = fcmp olt float %value, %high 161 %2 = select i1 %1, float %value, float %high 162 %3 = fcmp ogt float %2, %low 163 %4 = select i1 %3, float %2, float %low 164 ret float %4 165} 166 167 168 169define <4 x i32> @_Z5clampDv4_iS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind readonly { 170 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind readnone 171 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone 172 ret <4 x i32> %2 173} 174 175define <4 x i32> @_Z5clampDv4_iii(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly { 176 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 177 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 178 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind readnone 179 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readnone 180 ret <4 x i32> %2 181} 182 183define <3 x i32> @_Z5clampDv3_iS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind readonly { 184 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 185 %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 186 %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 187 %a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 188 %b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 189 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 190 ret <3 x i32> %c 191} 192 193define <3 x i32> @_Z5clampDv3_iii(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly { 194 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 195 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 196 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 197 %a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 198 %b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 199 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 200 ret <3 x i32> %c 201} 202 203define <2 x i32> @_Z5clampDv2_iS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind readonly { 204 %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind readnone 205 %2 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone 206 ret <2 x i32> %2 207} 208 209define <2 x i32> @_Z5clampDv2_iii(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly { 210 %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone 211 %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone 212 %a = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind readnone 213 %b = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readnone 214 ret <2 x i32> %b 215} 216 217 218 219define <4 x i32> @_Z5clampDv4_jS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind readonly { 220 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind readnone 221 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone 222 ret <4 x i32> %2 223} 224 225define <4 x i32> @_Z5clampDv4_jjj(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly { 226 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 227 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 228 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind readnone 229 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readnone 230 ret <4 x i32> %2 231} 232 233define <3 x i32> @_Z5clampDv3_jS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind readonly { 234 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 235 %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 236 %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 237 %a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 238 %b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 239 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 240 ret <3 x i32> %c 241} 242 243define <3 x i32> @_Z5clampDv3_jjj(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly { 244 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 245 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 246 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 247 %a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 248 %b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 249 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 250 ret <3 x i32> %c 251} 252 253define <2 x i32> @_Z5clampDv2_jS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind readonly { 254 %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind readnone 255 %2 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone 256 ret <2 x i32> %2 257} 258 259define <2 x i32> @_Z5clampDv2_jjj(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly { 260 %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone 261 %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone 262 %a = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind readnone 263 %b = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readnone 264 ret <2 x i32> %b 265} 266 267 268;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 269;;;;;;;;; FMAX ;;;;;;;;;; 270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 271 272define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly { 273 %1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone 274 ret <4 x float> %1 275} 276 277define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly { 278 %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 279 %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone 280 ret <4 x float> %2 281} 282 283define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly { 284 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 285 %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 286 %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 287 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 288 ret <3 x float> %4 289} 290 291define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly { 292 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 293 %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 294 %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 295 %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 296 ret <3 x float> %c 297} 298 299define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly { 300 %1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone 301 ret <2 x float> %1 302} 303 304define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly { 305 %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone 306 %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone 307 ret <2 x float> %2 308} 309 310define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly { 311 %1 = fcmp ogt float %v1, %v2 312 %2 = select i1 %1, float %v1, float %v2 313 ret float %2 314} 315 316 317;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 318;;;;;;;;; FMIN ;;;;;;;;;; 319;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 320 321define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly { 322 %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone 323 ret <4 x float> %1 324} 325 326define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly { 327 %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 328 %2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone 329 ret <4 x float> %2 330} 331 332define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly { 333 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 334 %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 335 %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 336 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 337 ret <3 x float> %4 338} 339 340define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly { 341 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 342 %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 343 %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 344 %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 345 ret <3 x float> %c 346} 347 348define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly { 349 %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone 350 ret <2 x float> %1 351} 352 353define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly { 354 %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone 355 %2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone 356 ret <2 x float> %2 357} 358 359define float @_Z4fminff(float %v1, float %v2) nounwind readnone { 360 %1 = fcmp olt float %v1, %v2 361 %2 = select i1 %1, float %v1, float %v2 362 ret float %2 363} 364 365 366;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 367;;;;;;;;; MAX ;;;;;;;;;; 368;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 369 370define signext i8 @_Z3maxcc(i8 signext %v1, i8 signext %v2) nounwind readnone { 371 %1 = icmp sgt i8 %v1, %v2 372 %2 = select i1 %1, i8 %v1, i8 %v2 373 ret i8 %2 374} 375 376define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 377 %1 = sext <2 x i8> %v1 to <2 x i32> 378 %2 = sext <2 x i8> %v2 to <2 x i32> 379 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 380 %4 = trunc <2 x i32> %3 to <2 x i8> 381 ret <2 x i8> %4 382} 383 384define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 385 %1 = sext <3 x i8> %v1 to <3 x i32> 386 %2 = sext <3 x i8> %v2 to <3 x i32> 387 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 388 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 389 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 390 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 391 %7 = trunc <3 x i32> %6 to <3 x i8> 392 ret <3 x i8> %7 393} 394 395define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 396 %1 = sext <4 x i8> %v1 to <4 x i32> 397 %2 = sext <4 x i8> %v2 to <4 x i32> 398 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 399 %4 = trunc <4 x i32> %3 to <4 x i8> 400 ret <4 x i8> %4 401} 402 403define signext i16 @_Z3maxss(i16 signext %v1, i16 signext %v2) nounwind readnone { 404 %1 = icmp sgt i16 %v1, %v2 405 %2 = select i1 %1, i16 %v1, i16 %v2 406 ret i16 %2 407} 408 409define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 410 %1 = sext <2 x i16> %v1 to <2 x i32> 411 %2 = sext <2 x i16> %v2 to <2 x i32> 412 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 413 %4 = trunc <2 x i32> %3 to <2 x i16> 414 ret <2 x i16> %4 415} 416 417define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 418 %1 = sext <3 x i16> %v1 to <3 x i32> 419 %2 = sext <3 x i16> %v2 to <3 x i32> 420 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 421 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 422 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 423 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 424 %7 = trunc <3 x i32> %6 to <3 x i16> 425 ret <3 x i16> %7 426} 427 428define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 429 %1 = sext <4 x i16> %v1 to <4 x i32> 430 %2 = sext <4 x i16> %v2 to <4 x i32> 431 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 432 %4 = trunc <4 x i32> %3 to <4 x i16> 433 ret <4 x i16> %4 434} 435 436define i32 @_Z3maxii(i32 %v1, i32 %v2) nounwind readnone { 437 %1 = icmp sgt i32 %v1, %v2 438 %2 = select i1 %1, i32 %v1, i32 %v2 439 ret i32 %2 440} 441 442define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 443 %1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 444 ret <2 x i32> %1 445} 446 447define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 448 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 449 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 450 %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 451 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 452 ret <3 x i32> %4 453} 454 455define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 456 %1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 457 ret <4 x i32> %1 458} 459 460define i64 @_Z3maxxx(i64 %v1, i64 %v2) nounwind readnone { 461 %1 = icmp sgt i64 %v1, %v2 462 %2 = select i1 %1, i64 %v1, i64 %v2 463 ret i64 %2 464} 465 466; TODO: long vector types 467 468define zeroext i8 @_Z3maxhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone { 469 %1 = icmp ugt i8 %v1, %v2 470 %2 = select i1 %1, i8 %v1, i8 %v2 471 ret i8 %2 472} 473 474define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 475 %1 = zext <2 x i8> %v1 to <2 x i32> 476 %2 = zext <2 x i8> %v2 to <2 x i32> 477 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 478 %4 = trunc <2 x i32> %3 to <2 x i8> 479 ret <2 x i8> %4 480} 481 482define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 483 %1 = zext <3 x i8> %v1 to <3 x i32> 484 %2 = zext <3 x i8> %v2 to <3 x i32> 485 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 486 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 487 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 488 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 489 %7 = trunc <3 x i32> %6 to <3 x i8> 490 ret <3 x i8> %7 491} 492 493define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 494 %1 = zext <4 x i8> %v1 to <4 x i32> 495 %2 = zext <4 x i8> %v2 to <4 x i32> 496 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 497 %4 = trunc <4 x i32> %3 to <4 x i8> 498 ret <4 x i8> %4 499} 500 501define zeroext i16 @_Z3maxtt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone { 502 %1 = icmp ugt i16 %v1, %v2 503 %2 = select i1 %1, i16 %v1, i16 %v2 504 ret i16 %2 505} 506 507define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 508 %1 = zext <2 x i16> %v1 to <2 x i32> 509 %2 = zext <2 x i16> %v2 to <2 x i32> 510 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 511 %4 = trunc <2 x i32> %3 to <2 x i16> 512 ret <2 x i16> %4 513} 514 515define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 516 %1 = zext <3 x i16> %v1 to <3 x i32> 517 %2 = zext <3 x i16> %v2 to <3 x i32> 518 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 519 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 520 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 521 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 522 %7 = trunc <3 x i32> %6 to <3 x i16> 523 ret <3 x i16> %7 524} 525 526define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 527 %1 = zext <4 x i16> %v1 to <4 x i32> 528 %2 = zext <4 x i16> %v2 to <4 x i32> 529 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 530 %4 = trunc <4 x i32> %3 to <4 x i16> 531 ret <4 x i16> %4 532} 533 534define i32 @_Z3maxjj(i32 %v1, i32 %v2) nounwind readnone { 535 %1 = icmp ugt i32 %v1, %v2 536 %2 = select i1 %1, i32 %v1, i32 %v2 537 ret i32 %2 538} 539 540define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 541 %1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 542 ret <2 x i32> %1 543} 544 545define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 546 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 547 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 548 %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 549 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 550 ret <3 x i32> %4 551} 552 553define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 554 %1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 555 ret <4 x i32> %1 556} 557 558define i64 @_Z3maxyy(i64 %v1, i64 %v2) nounwind readnone { 559 %1 = icmp ugt i64 %v1, %v2 560 %2 = select i1 %1, i64 %v1, i64 %v2 561 ret i64 %2 562} 563 564; TODO: long vector types 565 566define float @_Z3maxff(float %v1, float %v2) nounwind readnone { 567 %1 = tail call float @_Z4fmaxff(float %v1, float %v2) 568 ret float %1 569} 570 571define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone { 572 %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) 573 ret <2 x float> %1 574} 575 576define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone { 577 %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) 578 ret <2 x float> %1 579} 580 581define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone { 582 %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) 583 ret <3 x float> %1 584} 585 586define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone { 587 %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) 588 ret <3 x float> %1 589} 590 591define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone { 592 %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) 593 ret <4 x float> %1 594} 595 596define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone { 597 %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) 598 ret <4 x float> %1 599} 600 601 602;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 603;;;;;;;;; MIN ;;;;;;;;;; 604;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 605 606define signext i8 @_Z3mincc(i8 signext %v1, i8 signext %v2) nounwind readnone { 607 %1 = icmp slt i8 %v1, %v2 608 %2 = select i1 %1, i8 %v1, i8 %v2 609 ret i8 %2 610} 611 612define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 613 %1 = sext <2 x i8> %v1 to <2 x i32> 614 %2 = sext <2 x i8> %v2 to <2 x i32> 615 %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 616 %4 = trunc <2 x i32> %3 to <2 x i8> 617 ret <2 x i8> %4 618} 619 620define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 621 %1 = sext <3 x i8> %v1 to <3 x i32> 622 %2 = sext <3 x i8> %v2 to <3 x i32> 623 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 624 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 625 %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 626 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 627 %7 = trunc <3 x i32> %6 to <3 x i8> 628 ret <3 x i8> %7 629} 630 631define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 632 %1 = sext <4 x i8> %v1 to <4 x i32> 633 %2 = sext <4 x i8> %v2 to <4 x i32> 634 %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 635 %4 = trunc <4 x i32> %3 to <4 x i8> 636 ret <4 x i8> %4 637} 638 639define signext i16 @_Z3minss(i16 signext %v1, i16 signext %v2) nounwind readnone { 640 %1 = icmp slt i16 %v1, %v2 641 %2 = select i1 %1, i16 %v1, i16 %v2 642 ret i16 %2 643} 644 645define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 646 %1 = sext <2 x i16> %v1 to <2 x i32> 647 %2 = sext <2 x i16> %v2 to <2 x i32> 648 %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 649 %4 = trunc <2 x i32> %3 to <2 x i16> 650 ret <2 x i16> %4 651} 652 653define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 654 %1 = sext <3 x i16> %v1 to <3 x i32> 655 %2 = sext <3 x i16> %v2 to <3 x i32> 656 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 657 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 658 %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 659 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 660 %7 = trunc <3 x i32> %6 to <3 x i16> 661 ret <3 x i16> %7 662} 663 664define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 665 %1 = sext <4 x i16> %v1 to <4 x i32> 666 %2 = sext <4 x i16> %v2 to <4 x i32> 667 %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 668 %4 = trunc <4 x i32> %3 to <4 x i16> 669 ret <4 x i16> %4 670} 671 672define i32 @_Z3minii(i32 %v1, i32 %v2) nounwind readnone { 673 %1 = icmp slt i32 %v1, %v2 674 %2 = select i1 %1, i32 %v1, i32 %v2 675 ret i32 %2 676} 677 678define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 679 %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 680 ret <2 x i32> %1 681} 682 683define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 684 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 685 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 686 %3 = tail call <4 x i32 > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 687 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 688 ret <3 x i32> %4 689} 690 691define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 692 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 693 ret <4 x i32> %1 694} 695 696define i64 @_Z3minxx(i64 %v1, i64 %v2) nounwind readnone { 697 %1 = icmp slt i64 %v1, %v2 698 %2 = select i1 %1, i64 %v1, i64 %v2 699 ret i64 %2 700} 701 702; TODO: long vector types 703 704define zeroext i8 @_Z3minhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone { 705 %1 = icmp ult i8 %v1, %v2 706 %2 = select i1 %1, i8 %v1, i8 %v2 707 ret i8 %2 708} 709 710define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 711 %1 = zext <2 x i8> %v1 to <2 x i32> 712 %2 = zext <2 x i8> %v2 to <2 x i32> 713 %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 714 %4 = trunc <2 x i32> %3 to <2 x i8> 715 ret <2 x i8> %4 716} 717 718define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 719 %1 = zext <3 x i8> %v1 to <3 x i32> 720 %2 = zext <3 x i8> %v2 to <3 x i32> 721 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 722 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 723 %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 724 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 725 %7 = trunc <3 x i32> %6 to <3 x i8> 726 ret <3 x i8> %7 727} 728 729define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 730 %1 = zext <4 x i8> %v1 to <4 x i32> 731 %2 = zext <4 x i8> %v2 to <4 x i32> 732 %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 733 %4 = trunc <4 x i32> %3 to <4 x i8> 734 ret <4 x i8> %4 735} 736 737define zeroext i16 @_Z3mintt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone { 738 %1 = icmp ult i16 %v1, %v2 739 %2 = select i1 %1, i16 %v1, i16 %v2 740 ret i16 %2 741} 742 743define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 744 %1 = zext <2 x i16> %v1 to <2 x i32> 745 %2 = zext <2 x i16> %v2 to <2 x i32> 746 %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 747 %4 = trunc <2 x i32> %3 to <2 x i16> 748 ret <2 x i16> %4 749} 750 751define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 752 %1 = zext <3 x i16> %v1 to <3 x i32> 753 %2 = zext <3 x i16> %v2 to <3 x i32> 754 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 755 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 756 %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 757 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 758 %7 = trunc <3 x i32> %6 to <3 x i16> 759 ret <3 x i16> %7 760} 761 762define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 763 %1 = zext <4 x i16> %v1 to <4 x i32> 764 %2 = zext <4 x i16> %v2 to <4 x i32> 765 %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 766 %4 = trunc <4 x i32> %3 to <4 x i16> 767 ret <4 x i16> %4 768} 769 770define i32 @_Z3minjj(i32 %v1, i32 %v2) nounwind readnone { 771 %1 = icmp ult i32 %v1, %v2 772 %2 = select i1 %1, i32 %v1, i32 %v2 773 ret i32 %2 774} 775 776define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 777 %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 778 ret <2 x i32> %1 779} 780 781define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 782 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 783 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 784 %3 = tail call <4 x i32 > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 785 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 786 ret <3 x i32> %4 787} 788 789define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 790 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 791 ret <4 x i32> %1 792} 793 794define i64 @_Z3minyy(i64 %v1, i64 %v2) nounwind readnone { 795 %1 = icmp ult i64 %v1, %v2 796 %2 = select i1 %1, i64 %v1, i64 %v2 797 ret i64 %2 798} 799 800; TODO: long vector types 801 802define float @_Z3minff(float %v1, float %v2) nounwind readnone { 803 %1 = tail call float @_Z4fminff(float %v1, float %v2) 804 ret float %1 805} 806 807define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone { 808 %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) 809 ret <2 x float> %1 810} 811 812define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone { 813 %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) 814 ret <2 x float> %1 815} 816 817define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone { 818 %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) 819 ret <3 x float> %1 820} 821 822define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone { 823 %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) 824 ret <3 x float> %1 825} 826 827define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone { 828 %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) 829 ret <4 x float> %1 830} 831 832define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone { 833 %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) 834 ret <4 x float> %1 835} 836 837 838;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 839;;;;;;;;; YUV ;;;;;;;;;; 840;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 841 842@yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16 843@yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16 844@yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 845@yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16 846 847 848define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline { 849 %_sy = zext i8 %pY to i32 850 %_su = zext i8 %pU to i32 851 %_sv = zext i8 %pV to i32 852 853 %_sy2 = add i32 -16, %_sy 854 %_sy3 = mul i32 298, %_sy2 855 %_su2 = add i32 -128, %_su 856 %_sv2 = add i32 -128, %_sv 857 %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone 858 %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone 859 %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone 860 861 %mu = load <4 x i32>* @yuv_U, align 8 862 %mv = load <4 x i32>* @yuv_V, align 8 863 %_u2 = mul <4 x i32> %_u, %mu 864 %_v2 = mul <4 x i32> %_v, %mv 865 %_y2 = add <4 x i32> %_y, %_u2 866 %_y3 = add <4 x i32> %_y2, %_v2 867 868 ; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 8, i32 8, i32 8>) nounwind readnone 869; %r2 = trunc <4 x i16> %r1 to <4 x i8> 870; ret <4 x i8> %r2 871 872 %c0 = load <4 x i32>* @yuv_0, align 8 873 %c255 = load <4 x i32>* @yuv_255, align 8 874 %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone 875 %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone 876 %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8> 877 %r4 = trunc <4 x i32> %r3 to <4 x i8> 878 ret <4 x i8> %r4 879} 880 881;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 882;;;;;;;;; half_RECIP ;;;;;;;;;; 883;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 884 885define <2 x float> @_Z10half_recipDv2_f(<2 x float> %v) nounwind readnone { 886 %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone 887 %2 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %1, <2 x float> %v) nounwind readnone 888 %3 = fmul <2 x float> %1, %2 889 %4 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %3, <2 x float> %v) nounwind readnone 890 %5 = fmul <2 x float> %4, %3 891 ret <2 x float> %5 892} 893 894define <4 x float> @_Z10half_recipDv4_f(<4 x float> %v) nounwind readnone { 895 %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone 896 %2 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %1, <4 x float> %v) nounwind readnone 897 %3 = fmul <4 x float> %1, %2 898 %4 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %3, <4 x float> %v) nounwind readnone 899 %5 = fmul <4 x float> %4, %3 900 ret <4 x float> %5 901} 902 903define <3 x float> @_Z10half_recipDv3_f(<3 x float> %v) nounwind readnone { 904 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 905 %2 = tail call <4 x float> @_Z10half_recipDv4_f(<4 x float> %1) nounwind readnone 906 %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 907 ret <3 x float> %3 908} 909 910 911;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 912;;;;;;;;; half_RSQRT ;;;;;;;;;; 913;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 914 915define float @_Z10half_rsqrtf(float %v) { 916 %1 = insertelement <2 x float> undef, float %v, i32 0 917 %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone 918 %3 = extractelement <2 x float> %2, i32 0 919 ret float %3 920} 921 922define <2 x float> @_Z10half_rsqrtDv2_f(<2 x float> %v) nounwind readnone { 923 %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone 924 ret <2 x float> %1 925} 926 927define <3 x float> @_Z10half_rsqrtDv3_f(<3 x float> %v) nounwind readnone { 928 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 929 %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone 930 %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 931 ret <3 x float> %3 932} 933 934define <4 x float> @_Z10half_rsqrtDv4_f(<4 x float> %v) nounwind readnone { 935 %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone 936 ret <4 x float> %1 937} 938 939;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 940;;;;;;;;; matrix ;;;;;;;;;; 941;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 942 943declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly 944 945%struct.rs_matrix4x4 = type { [16 x float] } 946%struct.rs_matrix3x3 = type { [9 x float] } 947%struct.rs_matrix2x2 = type { [4 x float] } 948 949define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline { 950 %1 = insertelement <4 x float> undef, float %in, i32 0 951 %2 = insertelement <4 x float> %1, float %in, i32 1 952 %3 = insertelement <4 x float> %2, float %in, i32 2 953 %4 = insertelement <4 x float> %3, float %in, i32 3 954 ret <4 x float> %4 955} 956 957 958define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly { 959 %x0 = extractelement <3 x float> %in, i32 0 960 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 961 %y0 = extractelement <3 x float> %in, i32 1 962 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 963 %z0 = extractelement <3 x float> %in, i32 2 964 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 965 966 %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 967 %px2 = bitcast float* %px to i8* 968 %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind 969 970 %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 971 %py2 = bitcast float* %py to i8* 972 %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind 973 974 %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5 975 %pz2 = bitcast float* %pz to i8* 976 %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind 977 %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 978 979 %a1 = fmul <4 x float> %x, %xm 980 %a2 = fmul <4 x float> %y, %ym 981 %a3 = fadd <4 x float> %a1, %a2 982 %a4 = fmul <4 x float> %z, %zm 983 %a5 = fadd <4 x float> %a4, %a3 984 %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 985 ret <3 x float> %a6 986} 987 988define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly { 989 %x0 = extractelement <2 x float> %in, i32 0 990 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 991 %y0 = extractelement <2 x float> %in, i32 1 992 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 993 994 %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 995 %px2 = bitcast float* %px to <4 x float>* 996 %xm = load <4 x float>* %px2, align 4 997 %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 998 %py2 = bitcast float* %py to <4 x float>* 999 %ym = load <4 x float>* %py2, align 4 1000 1001 %a1 = fmul <4 x float> %x, %xm 1002 %a2 = fmul <4 x float> %y, %ym 1003 %a3 = fadd <4 x float> %a1, %a2 1004 %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 1005 ret <3 x float> %a4 1006} 1007 1008define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly { 1009 %x0 = extractelement <4 x float> %in, i32 0 1010 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1011 %y0 = extractelement <4 x float> %in, i32 1 1012 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1013 %z0 = extractelement <4 x float> %in, i32 2 1014 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 1015 %w0 = extractelement <4 x float> %in, i32 3 1016 %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone 1017 1018 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1019 %px2 = bitcast float* %px to <4 x float>* 1020 %xm = load <4 x float>* %px2, align 4 1021 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1022 %py2 = bitcast float* %py to <4 x float>* 1023 %ym = load <4 x float>* %py2, align 4 1024 %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 1025 %pz2 = bitcast float* %pz to <4 x float>* 1026 %zm = load <4 x float>* %pz2, align 4 1027 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1028 %pw2 = bitcast float* %pw to <4 x float>* 1029 %wm = load <4 x float>* %pw2, align 4 1030 1031 %a1 = fmul <4 x float> %x, %xm 1032 %a2 = fmul <4 x float> %y, %ym 1033 %a3 = fadd <4 x float> %a1, %a2 1034 %a4 = fmul <4 x float> %z, %zm 1035 %a5 = fadd <4 x float> %a3, %a4 1036 %a6 = fmul <4 x float> %w, %wm 1037 %a7 = fadd <4 x float> %a5, %a6 1038 ret <4 x float> %a7 1039} 1040 1041define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly { 1042 %x0 = extractelement <3 x float> %in, i32 0 1043 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1044 %y0 = extractelement <3 x float> %in, i32 1 1045 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1046 %z0 = extractelement <3 x float> %in, i32 2 1047 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 1048 1049 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1050 %px2 = bitcast float* %px to <4 x float>* 1051 %xm = load <4 x float>* %px2, align 4 1052 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1053 %py2 = bitcast float* %py to <4 x float>* 1054 %ym = load <4 x float>* %py2, align 4 1055 %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 1056 %pz2 = bitcast float* %pz to <4 x float>* 1057 %zm = load <4 x float>* %pz2, align 4 1058 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1059 %pw2 = bitcast float* %pw to <4 x float>* 1060 %wm = load <4 x float>* %pw2, align 4 1061 1062 %a1 = fmul <4 x float> %x, %xm 1063 %a2 = fadd <4 x float> %wm, %a1 1064 %a3 = fmul <4 x float> %y, %ym 1065 %a4 = fadd <4 x float> %a2, %a3 1066 %a5 = fmul <4 x float> %z, %zm 1067 %a6 = fadd <4 x float> %a4, %a5 1068 ret <4 x float> %a6 1069} 1070 1071define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly { 1072 %x0 = extractelement <2 x float> %in, i32 0 1073 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1074 %y0 = extractelement <2 x float> %in, i32 1 1075 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1076 1077 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1078 %px2 = bitcast float* %px to <4 x float>* 1079 %xm = load <4 x float>* %px2, align 4 1080 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1081 %py2 = bitcast float* %py to <4 x float>* 1082 %ym = load <4 x float>* %py2, align 4 1083 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1084 %pw2 = bitcast float* %pw to <4 x float>* 1085 %wm = load <4 x float>* %pw2, align 4 1086 1087 %a1 = fmul <4 x float> %x, %xm 1088 %a2 = fadd <4 x float> %wm, %a1 1089 %a3 = fmul <4 x float> %y, %ym 1090 %a4 = fadd <4 x float> %a2, %a3 1091 ret <4 x float> %a4 1092} 1093 1094 1095 1096;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1097;;;;;;;;; pixel ops ;;;;;;;;;; 1098;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1099 1100 1101@fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, align 16 1102@fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16 1103@fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16 1104 1105declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone 1106declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone 1107 1108; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color) 1109define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone { 1110 %f255 = load <4 x float>* @fc_255.0, align 16 1111 %f05 = load <4 x float>* @fc_0.5, align 16 1112 %f0 = load <4 x float>* @fc_0, align 16 1113 %v1 = fmul <4 x float> %f255, %color 1114 %v2 = fadd <4 x float> %f05, %v1 1115 %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone 1116 %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone 1117 ret <4 x i8> %v4 1118} 1119 1120; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color) 1121define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone { 1122 %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1123 %2 = insertelement <4 x float> %1, float 1.0, i32 3 1124 %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone 1125 ret <4 x i8> %3 1126} 1127 1128; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b) 1129define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone { 1130 %1 = insertelement <4 x float> undef, float %r, i32 0 1131 %2 = insertelement <4 x float> %1, float %g, i32 1 1132 %3 = insertelement <4 x float> %2, float %b, i32 2 1133 %4 = insertelement <4 x float> %3, float 1.0, i32 3 1134 %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone 1135 ret <4 x i8> %5 1136} 1137 1138; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a) 1139define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnone { 1140 %1 = insertelement <4 x float> undef, float %r, i32 0 1141 %2 = insertelement <4 x float> %1, float %g, i32 1 1142 %3 = insertelement <4 x float> %2, float %b, i32 2 1143 %4 = insertelement <4 x float> %3, float %a, i32 3 1144 %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone 1145 ret <4 x i8> %5 1146} 1147 1148