omxVCM4P10_PredictIntra_16x16_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16/* 17 * 18 */ 19 20 .eabi_attribute 24, 1 21 .eabi_attribute 25, 1 22 23 .arm 24 .fpu neon 25 26 .text 27 .align 4 28;//------------------------------------------------------- 29;// This table for implementing switch case of C in asm by 30;// the mehtod of two levels of indexing. 31;//------------------------------------------------------- 32 33armVCM4P10_pIndexTable16x16: 34 .word OMX_VC_16X16_VERT-(P0+8), OMX_VC_16X16_HOR-(P0+8) 35 .word OMX_VC_16X16_DC-(P0+8), OMX_VC_16X16_PLANE-(P0+8) 36 37 38armVCM4P10_MultiplierTable16x16: 39 .hword 7, 6, 5, 4, 3, 2, 1, 8 40 .hword 0, 1, 2, 3, 4, 5, 6, 7 41 .hword 8, 9, 10, 11, 12, 13, 14, 15 42 43 44 .global omxVCM4P10_PredictIntra_16x16 45 .func omxVCM4P10_PredictIntra_16x16 46omxVCM4P10_PredictIntra_16x16: 47 PUSH {r4-r12,lr} 48 VPUSH {d8-d15} 49 ADR r9, armVCM4P10_pIndexTable16x16 50 LDR r6,[sp,#0x70] 51 LDR r4,[sp,#0x68] 52 LDR r5,[sp,#0x6c] 53 LDR r7,[sp,#0x74] 54 MOV r12,#0x10 55 LDR r9,[r9,r6,LSL #2] 56P0: ADD pc,r9 57OMX_VC_16X16_VERT: 58 VLD1.8 {d0,d1},[r1] 59 ADD r8,r3,r5 60 ADD r10,r5,r5 61 VST1.8 {d0,d1},[r3],r10 62 VST1.8 {d0,d1},[r8],r10 63 VST1.8 {d0,d1},[r3],r10 64 VST1.8 {d0,d1},[r8],r10 65 VST1.8 {d0,d1},[r3],r10 66 VST1.8 {d0,d1},[r8],r10 67 VST1.8 {d0,d1},[r3],r10 68 VST1.8 {d0,d1},[r8],r10 69 VST1.8 {d0,d1},[r3],r10 70 VST1.8 {d0,d1},[r8],r10 71 VST1.8 {d0,d1},[r3],r10 72 VST1.8 {d0,d1},[r8],r10 73 VST1.8 {d0,d1},[r3],r10 74 VST1.8 {d0,d1},[r8],r10 75 VST1.8 {d0,d1},[r3] 76 VST1.8 {d0,d1},[r8] 77 MOV r0,#0 78 VPOP {d8-d15} 79 POP {r4-r12,pc} 80OMX_VC_16X16_HOR: 81 ADD r8,r0,r4 82 ADD r4,r4,r4 83 ADD r11,r3,r5 84 ADD r5,r5,r5 85L0x8c: 86 VLD1.8 {d2[],d3[]},[r0],r4 87 VLD1.8 {d0[],d1[]},[r8],r4 88 SUBS r12,r12,#8 89 VST1.8 {d2,d3},[r3],r5 90 VST1.8 {d0,d1},[r11],r5 91 VLD1.8 {d2[],d3[]},[r0],r4 92 VLD1.8 {d0[],d1[]},[r8],r4 93 VST1.8 {d2,d3},[r3],r5 94 VST1.8 {d0,d1},[r11],r5 95 VLD1.8 {d2[],d3[]},[r0],r4 96 VLD1.8 {d0[],d1[]},[r8],r4 97 VST1.8 {d2,d3},[r3],r5 98 VST1.8 {d0,d1},[r11],r5 99 VLD1.8 {d2[],d3[]},[r0],r4 100 VLD1.8 {d0[],d1[]},[r8],r4 101 VST1.8 {d2,d3},[r3],r5 102 VST1.8 {d0,d1},[r11],r5 103 BNE L0x8c 104 MOV r0,#0 105 VPOP {d8-d15} 106 POP {r4-r12,pc} 107OMX_VC_16X16_DC: 108 MOV r11,#0 109 TST r7,#2 110 BEQ L0x14c 111 ADD r8,r0,r4 112 ADD r10,r4,r4 113 VLD1.8 {d2[0]},[r0],r10 114 VLD1.8 {d2[1]},[r8],r10 115 VLD1.8 {d2[2]},[r0],r10 116 VLD1.8 {d2[3]},[r8],r10 117 VLD1.8 {d2[4]},[r0],r10 118 VLD1.8 {d2[5]},[r8],r10 119 VLD1.8 {d2[6]},[r0],r10 120 VLD1.8 {d2[7]},[r8],r10 121 VLD1.8 {d3[0]},[r0],r10 122 VLD1.8 {d3[1]},[r8],r10 123 VLD1.8 {d3[2]},[r0],r10 124 VLD1.8 {d3[3]},[r8],r10 125 VLD1.8 {d3[4]},[r0],r10 126 VLD1.8 {d3[5]},[r8],r10 127 VLD1.8 {d3[6]},[r0],r10 128 VLD1.8 {d3[7]},[r8] 129 VPADDL.U8 q0,q1 130 ADD r11,r11,#1 131 VPADD.I16 d0,d0,d1 132 VPADDL.U16 d0,d0 133 VPADDL.U32 d6,d0 134 VRSHR.U64 d8,d6,#4 135L0x14c: 136 TST r7,#1 137 BEQ L0x170 138 VLD1.8 {d0,d1},[r1] 139 ADD r11,r11,#1 140 VPADDL.U8 q0,q0 141 VPADD.I16 d0,d0,d1 142 VPADDL.U16 d0,d0 143 VPADDL.U32 d7,d0 144 VRSHR.U64 d8,d7,#4 145L0x170: 146 CMP r11,#2 147 BNE L0x180 148 VADD.I64 d8,d7,d6 149 VRSHR.U64 d8,d8,#5 150L0x180: 151 VDUP.8 q3,d8[0] 152 CMP r11,#0 153 ADD r8,r3,r5 154 ADD r10,r5,r5 155 BNE L0x198 156 VMOV.I8 q3,#0x80 157L0x198: 158 VST1.8 {d6,d7},[r3],r10 159 VST1.8 {d6,d7},[r8],r10 160 VST1.8 {d6,d7},[r3],r10 161 VST1.8 {d6,d7},[r8],r10 162 VST1.8 {d6,d7},[r3],r10 163 VST1.8 {d6,d7},[r8],r10 164 VST1.8 {d6,d7},[r3],r10 165 VST1.8 {d6,d7},[r8],r10 166 VST1.8 {d6,d7},[r3],r10 167 VST1.8 {d6,d7},[r8],r10 168 VST1.8 {d6,d7},[r3],r10 169 VST1.8 {d6,d7},[r8],r10 170 VST1.8 {d6,d7},[r3],r10 171 VST1.8 {d6,d7},[r8],r10 172 VST1.8 {d6,d7},[r3],r10 173 VST1.8 {d6,d7},[r8],r10 174 MOV r0,#0 175 VPOP {d8-d15} 176 POP {r4-r12,pc} 177OMX_VC_16X16_PLANE: 178 ADR r9, armVCM4P10_MultiplierTable16x16 179 VLD1.8 {d0,d1},[r1] 180 VLD1.8 {d4[0]},[r2] 181 ADD r8,r0,r4 182 ADD r10,r4,r4 183 VLD1.8 {d2[0]},[r0],r10 184 VLD1.8 {d2[1]},[r8],r10 185 VLD1.8 {d2[2]},[r0],r10 186 VLD1.8 {d2[3]},[r8],r10 187 VLD1.8 {d2[4]},[r0],r10 188 VLD1.8 {d2[5]},[r8],r10 189 VLD1.8 {d2[6]},[r0],r10 190 VLD1.8 {d2[7]},[r8],r10 191 VLD1.8 {d3[0]},[r0],r10 192 VLD1.8 {d3[1]},[r8],r10 193 VLD1.8 {d3[2]},[r0],r10 194 VLD1.8 {d3[3]},[r8],r10 195 VLD1.8 {d3[4]},[r0],r10 196 VLD1.8 {d3[5]},[r8],r10 197 VLD1.8 {d3[6]},[r0],r10 198 VLD1.8 {d3[7]},[r8] 199 VREV64.8 d5,d1 200 VSUBL.U8 q3,d5,d4 201 VSHR.U64 d5,d5,#8 202 VSUBL.U8 q4,d5,d0 203 VSHL.I64 d9,d9,#16 204 VEXT.8 d9,d9,d6,#2 205 VREV64.8 d12,d3 206 VSUBL.U8 q7,d12,d4 207 VSHR.U64 d12,d12,#8 208 VSUBL.U8 q8,d12,d2 209 VLD1.16 {d20,d21},[r9]! 210 VSHL.I64 d17,d17,#16 211 VEXT.8 d17,d17,d14,#2 212 VMULL.S16 q11,d8,d20 213 VMULL.S16 q12,d16,d20 214 VMLAL.S16 q11,d9,d21 215 VMLAL.S16 q12,d17,d21 216 VPADD.I32 d22,d23,d22 217 VPADD.I32 d23,d25,d24 218 VPADDL.S32 q11,q11 219 VSHL.I64 q12,q11,#2 220 VADD.I64 q11,q11,q12 221 VRSHR.S64 q11,q11,#6 222 VSHL.I64 q12,q11,#3 223 VSUB.I64 q12,q12,q11 224 VLD1.16 {d20,d21},[r9]! 225 VDUP.16 q6,d22[0] 226 VDUP.16 q7,d23[0] 227 VADDL.U8 q11,d1,d3 228 VSHL.I16 q11,q11,#4 229 VDUP.16 q11,d23[3] 230 VADD.I64 d1,d24,d25 231 VLD1.16 {d24,d25},[r9] 232 VDUP.16 q13,d1[0] 233 VSUB.I16 q13,q11,q13 234 VMUL.I16 q5,q6,q10 235 VMUL.I16 q6,q6,q12 236 VADD.I16 q0,q5,q13 237 VADD.I16 q1,q6,q13 238L0x2d4: 239 VQRSHRUN.S16 d6,q0,#5 240 VQRSHRUN.S16 d7,q1,#5 241 SUBS r12,r12,#1 242 VST1.8 {d6,d7},[r3],r5 243 VADD.I16 q0,q0,q7 244 VADD.I16 q1,q1,q7 245 BNE L0x2d4 246 MOV r0,#0 247 VPOP {d8-d15} 248 POP {r4-r12,pc} 249 .endfunc 250 251 .end 252 253