1/* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17/* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 .text 27 28 .global omxVCM4P10_InterpolateLuma 29omxVCM4P10_InterpolateLuma: 30 PUSH {r4-r12,lr} 31 VPUSH {d8-d15} 32 SUB sp,sp,#0x10 33 LDR r6,[sp,#0x78] 34 LDR r7,[sp,#0x7c] 35 LDR r5,[sp,#0x80] 36 LDR r4,[sp,#0x84] 37 ADD r6,r6,r7,LSL #2 38 ADD r11,sp,#0 39 VMOV.I16 d31,#0x14 40 VMOV.I16 d30,#0x5 41L0x2c: 42 STM r11,{r0-r3} 43 ADD pc,pc,r6,LSL #2 44 B L0x3f0 45 B L0x78 46 B L0xa8 47 B L0xdc 48 B L0x100 49 B L0x134 50 B L0x168 51 B L0x1a8 52 B L0x1f0 53 B L0x234 54 B L0x258 55 B L0x2b0 56 B L0x2d8 57 B L0x330 58 B L0x364 59 B L0x3a8 60 B L0x3f0 61L0x78: 62 ADD r12,r0,r1,LSL #1 63 VLD1.8 {d9},[r0],r1 64 VLD1.8 {d11},[r12],r1 65 VLD1.8 {d10},[r0] 66 VLD1.8 {d12},[r12] 67 ADD r12,r2,r3,LSL #1 68 VST1.32 {d9[0]},[r2],r3 69 VST1.32 {d11[0]},[r12],r3 70 VST1.32 {d10[0]},[r2] 71 VST1.32 {d12[0]},[r12] 72 ADD r11,sp,#0 73 B L0x434 74L0xa8: 75 SUB r0,r0,#2 76 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 77 VRHADD.U8 d22,d22,d14 78 VRHADD.U8 d26,d26,d18 79 VRHADD.U8 d24,d24,d16 80 VRHADD.U8 d28,d28,d20 81 ADD r12,r2,r3,LSL #1 82 VST1.32 {d22[0]},[r2],r3 83 VST1.32 {d26[0]},[r12],r3 84 VST1.32 {d24[0]},[r2] 85 VST1.32 {d28[0]},[r12] 86 ADD r11,sp,#0 87 B L0x434 88L0xdc: 89 SUB r0,r0,#2 90 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 91 ADD r12,r2,r3,LSL #1 92 VST1.32 {d22[0]},[r2],r3 93 VST1.32 {d26[0]},[r12],r3 94 VST1.32 {d24[0]},[r2] 95 VST1.32 {d28[0]},[r12] 96 ADD r11,sp,#0 97 B L0x434 98L0x100: 99 SUB r0,r0,#2 100 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 101 VRHADD.U8 d22,d22,d15 102 VRHADD.U8 d26,d26,d19 103 VRHADD.U8 d24,d24,d17 104 VRHADD.U8 d28,d28,d21 105 ADD r12,r2,r3,LSL #1 106 VST1.32 {d22[0]},[r2],r3 107 VST1.32 {d26[0]},[r12],r3 108 VST1.32 {d24[0]},[r2] 109 VST1.32 {d28[0]},[r12] 110 ADD r11,sp,#0 111 B L0x434 112L0x134: 113 SUB r0,r0,r1,LSL #1 114 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 115 VRHADD.U8 d0,d0,d9 116 VRHADD.U8 d4,d4,d11 117 VRHADD.U8 d2,d2,d10 118 VRHADD.U8 d6,d6,d12 119 ADD r12,r2,r3,LSL #1 120 VST1.32 {d0[0]},[r2],r3 121 VST1.32 {d4[0]},[r12],r3 122 VST1.32 {d2[0]},[r2] 123 VST1.32 {d6[0]},[r12] 124 ADD r11,sp,#0 125 B L0x434 126L0x168: 127 MOV r8,r0 128 SUB r0,r0,r1,LSL #1 129 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 130 SUB r0,r8,#2 131 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 132 VRHADD.U8 d22,d22,d0 133 VRHADD.U8 d26,d26,d4 134 VRHADD.U8 d24,d24,d2 135 VRHADD.U8 d28,d28,d6 136 ADD r12,r2,r3,LSL #1 137 VST1.32 {d22[0]},[r2],r3 138 VST1.32 {d26[0]},[r12],r3 139 VST1.32 {d24[0]},[r2] 140 VST1.32 {d28[0]},[r12] 141 ADD r11,sp,#0 142 B L0x434 143L0x1a8: 144 SUB r0,r0,r1,LSL #1 145 SUB r0,r0,#2 146 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 147 VQRSHRUN.S16 d14,q7,#5 148 VQRSHRUN.S16 d16,q8,#5 149 VQRSHRUN.S16 d18,q9,#5 150 VQRSHRUN.S16 d20,q10,#5 151 VRHADD.U8 d0,d0,d14 152 VRHADD.U8 d4,d4,d18 153 VRHADD.U8 d2,d2,d16 154 VRHADD.U8 d6,d6,d20 155 ADD r12,r2,r3,LSL #1 156 VST1.32 {d0[0]},[r2],r3 157 VST1.32 {d4[0]},[r12],r3 158 VST1.32 {d2[0]},[r2] 159 VST1.32 {d6[0]},[r12] 160 ADD r11,sp,#0 161 B L0x434 162L0x1f0: 163 MOV r8,r0 164 ADD r0,r0,#1 165 SUB r0,r0,r1,LSL #1 166 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 167 SUB r0,r8,#2 168 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 169 VRHADD.U8 d22,d22,d0 170 VRHADD.U8 d26,d26,d4 171 VRHADD.U8 d24,d24,d2 172 VRHADD.U8 d28,d28,d6 173 ADD r12,r2,r3,LSL #1 174 VST1.32 {d22[0]},[r2],r3 175 VST1.32 {d26[0]},[r12],r3 176 VST1.32 {d24[0]},[r2] 177 VST1.32 {d28[0]},[r12] 178 ADD r11,sp,#0 179 B L0x434 180L0x234: 181 SUB r0,r0,r1,LSL #1 182 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 183 ADD r12,r2,r3,LSL #1 184 VST1.32 {d0[0]},[r2],r3 185 VST1.32 {d4[0]},[r12],r3 186 VST1.32 {d2[0]},[r2] 187 VST1.32 {d6[0]},[r12] 188 ADD r11,sp,#0 189 B L0x434 190L0x258: 191 SUB r0,r0,r1,LSL #1 192 SUB r0,r0,#2 193 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 194 VEXT.8 d18,d18,d19,#4 195 VEXT.8 d20,d20,d21,#4 196 VEXT.8 d22,d22,d23,#4 197 VEXT.8 d24,d24,d25,#4 198 VQRSHRUN.S16 d14,q9,#5 199 VQRSHRUN.S16 d16,q10,#5 200 VQRSHRUN.S16 d18,q11,#5 201 VQRSHRUN.S16 d20,q12,#5 202 VRHADD.U8 d0,d0,d14 203 VRHADD.U8 d4,d4,d18 204 VRHADD.U8 d2,d2,d16 205 VRHADD.U8 d6,d6,d20 206 ADD r12,r2,r3,LSL #1 207 VST1.32 {d0[0]},[r2],r3 208 VST1.32 {d4[0]},[r12],r3 209 VST1.32 {d2[0]},[r2] 210 VST1.32 {d6[0]},[r12] 211 ADD r11,sp,#0 212 B L0x434 213L0x2b0: 214 SUB r0,r0,r1,LSL #1 215 SUB r0,r0,#2 216 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 217 ADD r12,r2,r3,LSL #1 218 VST1.32 {d0[0]},[r2],r3 219 VST1.32 {d4[0]},[r12],r3 220 VST1.32 {d2[0]},[r2] 221 VST1.32 {d6[0]},[r12] 222 ADD r11,sp,#0 223 B L0x434 224L0x2d8: 225 SUB r0,r0,r1,LSL #1 226 SUB r0,r0,#2 227 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 228 VEXT.8 d18,d18,d19,#6 229 VEXT.8 d20,d20,d21,#6 230 VEXT.8 d22,d22,d23,#6 231 VEXT.8 d24,d24,d25,#6 232 VQRSHRUN.S16 d14,q9,#5 233 VQRSHRUN.S16 d16,q10,#5 234 VQRSHRUN.S16 d18,q11,#5 235 VQRSHRUN.S16 d20,q12,#5 236 VRHADD.U8 d0,d0,d14 237 VRHADD.U8 d4,d4,d18 238 VRHADD.U8 d2,d2,d16 239 VRHADD.U8 d6,d6,d20 240 ADD r12,r2,r3,LSL #1 241 VST1.32 {d0[0]},[r2],r3 242 VST1.32 {d4[0]},[r12],r3 243 VST1.32 {d2[0]},[r2] 244 VST1.32 {d6[0]},[r12] 245 ADD r11,sp,#0 246 B L0x434 247L0x330: 248 SUB r0,r0,r1,LSL #1 249 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 250 VRHADD.U8 d0,d0,d10 251 VRHADD.U8 d4,d4,d12 252 VRHADD.U8 d2,d2,d11 253 VRHADD.U8 d6,d6,d13 254 ADD r12,r2,r3,LSL #1 255 VST1.32 {d0[0]},[r2],r3 256 VST1.32 {d4[0]},[r12],r3 257 VST1.32 {d2[0]},[r2] 258 VST1.32 {d6[0]},[r12] 259 ADD r11,sp,#0 260 B L0x434 261L0x364: 262 MOV r8,r0 263 SUB r0,r0,r1,LSL #1 264 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 265 ADD r0,r8,r1 266 SUB r0,r0,#2 267 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 268 VRHADD.U8 d22,d22,d0 269 VRHADD.U8 d26,d26,d4 270 VRHADD.U8 d24,d24,d2 271 VRHADD.U8 d28,d28,d6 272 ADD r12,r2,r3,LSL #1 273 VST1.32 {d22[0]},[r2],r3 274 VST1.32 {d26[0]},[r12],r3 275 VST1.32 {d24[0]},[r2] 276 VST1.32 {d28[0]},[r12] 277 ADD r11,sp,#0 278 B L0x434 279L0x3a8: 280 SUB r0,r0,r1,LSL #1 281 SUB r0,r0,#2 282 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 283 VQRSHRUN.S16 d14,q8,#5 284 VQRSHRUN.S16 d16,q9,#5 285 VQRSHRUN.S16 d18,q10,#5 286 VQRSHRUN.S16 d20,q11,#5 287 VRHADD.U8 d0,d0,d14 288 VRHADD.U8 d4,d4,d18 289 VRHADD.U8 d2,d2,d16 290 VRHADD.U8 d6,d6,d20 291 ADD r12,r2,r3,LSL #1 292 VST1.32 {d0[0]},[r2],r3 293 VST1.32 {d4[0]},[r12],r3 294 VST1.32 {d2[0]},[r2] 295 VST1.32 {d6[0]},[r12] 296 ADD r11,sp,#0 297 B L0x434 298L0x3f0: 299 MOV r8,r0 300 ADD r0,r0,#1 301 SUB r0,r0,r1,LSL #1 302 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 303 ADD r0,r8,r1 304 SUB r0,r0,#2 305 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 306 VRHADD.U8 d22,d22,d0 307 VRHADD.U8 d26,d26,d4 308 VRHADD.U8 d24,d24,d2 309 VRHADD.U8 d28,d28,d6 310 ADD r12,r2,r3,LSL #1 311 VST1.32 {d22[0]},[r2],r3 312 VST1.32 {d26[0]},[r12],r3 313 VST1.32 {d24[0]},[r2] 314 VST1.32 {d28[0]},[r12] 315 ADD r11,sp,#0 316L0x434: 317 LDM r11,{r0-r3} 318 SUBS r5,r5,#4 319 ADD r0,r0,#4 320 ADD r2,r2,#4 321 BGT L0x2c 322 SUBS r4,r4,#4 323 LDR r5,[sp,#0x80] 324 ADD r11,sp,#0 325 ADD r0,r0,r1,LSL #2 326 ADD r2,r2,r3,LSL #2 327 SUB r0,r0,r5 328 SUB r2,r2,r5 329 BGT L0x2c 330 MOV r0,#0 331 ADD sp,sp,#0x10 332 VPOP {d8-d15} 333 POP {r4-r12,pc} 334 335 .end 336 337