omxVCM4P10_PredictIntra_4x4_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/* 2 * Copyright (C) 2007-2008 ARM Limited 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17/* 18 * 19 */ 20 21 .eabi_attribute 24, 1 22 .eabi_attribute 25, 1 23 24 .arm 25 .fpu neon 26 27 .text 28 .align 4 29 30armVCM4P10_pSwitchTable4x4: 31 .word OMX_VC_4x4_VERT-(P0+8), OMX_VC_4x4_HOR-(P0+8) 32 .word OMX_VC_4x4_DC-(P0+8), OMX_VC_4x4_DIAG_DL-(P0+8) 33 .word OMX_VC_4x4_DIAG_DR-(P0+8), OMX_VC_4x4_VR-(P0+8) 34 .word OMX_VC_4x4_HD-(P0+8), OMX_VC_4x4_VL-(P0+8) 35 .word OMX_VC_4x4_HU-(P0+8) 36 37 .global omxVCM4P10_PredictIntra_4x4 38 .func omxVCM4P10_PredictIntra_4x4 39omxVCM4P10_PredictIntra_4x4: 40 PUSH {r4-r12,lr} 41 VPUSH {d8-d12} 42 ADR r8, armVCM4P10_pSwitchTable4x4 43 LDRD r6,r7,[sp,#0x58] 44 LDRD r4,r5,[sp,#0x50] 45 LDR r8,[r8,r6,LSL #2] 46P0: ADD pc, r8 47 48OMX_VC_4x4_HOR: 49 ADD r9,r0,r4 50 ADD r10,r4,r4 51 VLD1.8 {d0[]},[r0],r10 52 VLD1.8 {d1[]},[r9],r10 53 VLD1.8 {d2[]},[r0] 54 VLD1.8 {d3[]},[r9] 55 ADD r11,r3,r5 56 ADD r12,r5,r5 57 VST1.32 {d0[0]},[r3],r12 58 VST1.32 {d1[0]},[r11],r12 59 VST1.32 {d2[0]},[r3] 60 VST1.32 {d3[0]},[r11] 61 B L0x348 62OMX_VC_4x4_VERT: 63 VLD1.32 {d0[0]},[r1] 64 ADD r11,r3,r5 65 ADD r12,r5,r5 66L0x58: 67 VST1.32 {d0[0]},[r3],r12 68 VST1.32 {d0[0]},[r11],r12 69 VST1.32 {d0[0]},[r3] 70 VST1.32 {d0[0]},[r11] 71 B L0x348 72OMX_VC_4x4_DC: 73 TST r7,#2 74 BEQ L0xdc 75 ADD r9,r0,r4 76 ADD r10,r4,r4 77 VLD1.8 {d0[0]},[r0],r10 78 VLD1.8 {d0[1]},[r9],r10 79 VLD1.8 {d0[2]},[r0] 80 VLD1.8 {d0[3]},[r9] 81 TST r7,#1 82 BEQ L0xbc 83 VLD1.32 {d0[1]},[r1] 84 MOV r0,#0 85 VPADDL.U8 d1,d0 86 VPADDL.U16 d1,d1 87 VPADDL.U32 d1,d1 88 VRSHR.U64 d1,d1,#3 89 ADD r11,r3,r5 90 ADD r12,r5,r5 91 VDUP.8 d0,d1[0] 92 B L0x58 93L0xbc: 94 MOV r0,#0 95 VPADDL.U8 d1,d0 96 VPADDL.U16 d1,d1 97 VRSHR.U32 d1,d1,#2 98 ADD r11,r3,r5 99 ADD r12,r5,r5 100 VDUP.8 d0,d1[0] 101 B L0x58 102L0xdc: 103 TST r7,#1 104 BEQ L0x108 105 VLD1.32 {d0[0]},[r1] 106 MOV r0,#0 107 VPADDL.U8 d1,d0 108 VPADDL.U16 d1,d1 109 VRSHR.U32 d1,d1,#2 110 ADD r11,r3,r5 111 ADD r12,r5,r5 112 VDUP.8 d0,d1[0] 113 B L0x58 114L0x108: 115 VMOV.I8 d0,#0x80 116 MOV r0,#0 117 ADD r11,r3,r5 118 ADD r12,r5,r5 119 B L0x58 120OMX_VC_4x4_DIAG_DL: 121 TST r7,#0x40 122 BEQ L0x138 123 VLD1.8 {d3},[r1] 124 VDUP.8 d2,d3[7] 125 VEXT.8 d4,d3,d2,#1 126 VEXT.8 d5,d3,d2,#2 127 B L0x14c 128L0x138: 129 VLD1.32 {d0[1]},[r1] 130 VDUP.8 d2,d0[7] 131 VEXT.8 d3,d0,d2,#4 132 VEXT.8 d4,d0,d2,#5 133 VEXT.8 d5,d0,d2,#6 134L0x14c: 135 VHADD.U8 d6,d3,d5 136 VRHADD.U8 d6,d6,d4 137 VST1.32 {d6[0]},[r3],r5 138 VEXT.8 d6,d6,d6,#1 139 VST1.32 {d6[0]},[r3],r5 140 VEXT.8 d6,d6,d6,#1 141 VST1.32 {d6[0]},[r3],r5 142 VEXT.8 d6,d6,d6,#1 143 VST1.32 {d6[0]},[r3] 144 B L0x348 145OMX_VC_4x4_DIAG_DR: 146 VLD1.32 {d0[0]},[r1] 147 VLD1.8 {d1[7]},[r2] 148 ADD r9,r0,r4 149 ADD r10,r4,r4 150 ADD r1,r3,r5 151 VLD1.8 {d1[6]},[r0],r10 152 VLD1.8 {d1[5]},[r9],r10 153 VLD1.8 {d1[4]},[r0] 154 VLD1.8 {d1[3]},[r9] 155 VEXT.8 d3,d1,d0,#3 156 ADD r4,r1,r5 157 VEXT.8 d4,d1,d0,#4 158 ADD r6,r4,r5 159 VEXT.8 d5,d1,d0,#5 160 VHADD.U8 d6,d3,d5 161 VRHADD.U8 d6,d6,d4 162 VST1.32 {d6[0]},[r6] 163 VEXT.8 d6,d6,d6,#1 164 VST1.32 {d6[0]},[r4] 165 VEXT.8 d6,d6,d6,#1 166 VST1.32 {d6[0]},[r1] 167 VEXT.8 d6,d6,d6,#1 168 VST1.32 {d6[0]},[r3] 169 B L0x348 170OMX_VC_4x4_VR: 171 VLD1.32 {d0[0]},[r1] 172 VLD1.8 {d0[7]},[r2] 173 VLD1.8 {d1[7]},[r0],r4 174 VLD1.8 {d2[7]},[r0],r4 175 VLD1.8 {d1[6]},[r0] 176 VEXT.8 d12,d0,d0,#7 177 VEXT.8 d3,d1,d12,#6 178 VEXT.8 d4,d2,d12,#7 179 VEXT.8 d5,d1,d0,#7 180 VEXT.8 d6,d2,d0,#7 181 VEXT.8 d11,d1,d12,#7 182 VHADD.U8 d8,d6,d12 183 VRHADD.U8 d8,d8,d11 184 VHADD.U8 d7,d3,d5 185 VRHADD.U8 d7,d7,d4 186 VEXT.8 d10,d8,d8,#1 187 ADD r11,r3,r5 188 ADD r12,r5,r5 189 VEXT.8 d9,d7,d7,#1 190 VST1.32 {d10[0]},[r3],r12 191 VST1.32 {d9[0]},[r11],r12 192 VST1.32 {d8[0]},[r3],r12 193 VST1.32 {d7[0]},[r11] 194 B L0x348 195OMX_VC_4x4_HD: 196 VLD1.8 {d0},[r1] 197 VLD1.8 {d1[7]},[r2] 198 ADD r9,r0,r4 199 ADD r10,r4,r4 200 VLD1.8 {d1[6]},[r0],r10 201 VLD1.8 {d1[5]},[r9],r10 202 VLD1.8 {d1[4]},[r0] 203 VLD1.8 {d1[3]},[r9] 204 VEXT.8 d3,d1,d0,#3 205 VEXT.8 d4,d1,d0,#2 206 VEXT.8 d5,d1,d0,#1 207 VHADD.U8 d7,d3,d5 208 VRHADD.U8 d7,d7,d4 209 VRHADD.U8 d8,d4,d3 210 VSHL.I64 d8,d8,#24 211 VSHL.I64 d6,d7,#16 212 VZIP.8 d8,d6 213 VEXT.8 d7,d7,d7,#6 214 VEXT.8 d8,d6,d7,#2 215 ADD r11,r3,r5 216 ADD r12,r5,r5 217 VST1.32 {d8[1]},[r3],r12 218 VST1.32 {d6[1]},[r11],r12 219 VST1.32 {d8[0]},[r3] 220 VST1.32 {d6[0]},[r11] 221 B L0x348 222OMX_VC_4x4_VL: 223 TST r7,#0x40 224 BEQ L0x2b4 225 VLD1.8 {d3},[r1] 226 VEXT.8 d4,d3,d3,#1 227 VEXT.8 d5,d4,d4,#1 228 B L0x2c8 229L0x2b4: 230 VLD1.32 {d0[1]},[r1] 231 VDUP.8 d2,d0[7] 232 VEXT.8 d3,d0,d2,#4 233 VEXT.8 d4,d0,d2,#5 234 VEXT.8 d5,d0,d2,#6 235L0x2c8: 236 VRHADD.U8 d7,d4,d3 237 VHADD.U8 d10,d3,d5 238 VRHADD.U8 d10,d10,d4 239 VEXT.8 d8,d7,d7,#1 240 ADD r11,r3,r5 241 ADD r12,r5,r5 242 VEXT.8 d9,d10,d8,#1 243 VST1.32 {d7[0]},[r3],r12 244 VST1.32 {d10[0]},[r11],r12 245 VST1.32 {d8[0]},[r3] 246 VST1.32 {d9[0]},[r11] 247 B L0x348 248OMX_VC_4x4_HU: 249 ADD r9,r0,r4 250 ADD r10,r4,r4 251 VLD1.8 {d1[4]},[r0],r10 252 VLD1.8 {d1[5]},[r9],r10 253 VLD1.8 {d1[6]},[r0] 254 VLD1.8 {d1[7]},[r9] 255 VDUP.8 d2,d1[7] 256 VEXT.8 d3,d1,d2,#4 257 VEXT.8 d4,d1,d2,#5 258 VEXT.8 d5,d1,d2,#6 259 VHADD.U8 d7,d3,d5 260 VRHADD.U8 d7,d7,d4 261 VRHADD.U8 d8,d4,d3 262 VZIP.8 d8,d7 263 VST1.32 {d8[0]},[r3],r5 264 VEXT.8 d8,d8,d8,#2 265 VST1.32 {d8[0]},[r3],r5 266 VEXT.8 d8,d8,d8,#2 267 VST1.32 {d8[0]},[r3],r5 268 VST1.32 {d7[0]},[r3] 269L0x348: 270 MOV r0,#0 271 VPOP {d8-d12} 272 POP {r4-r12,pc} 273 .endfunc 274 275 .end 276