1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: omxVCM4P10_PredictIntra_4x4_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31;// Define the processor variants supported by this file 32 33 M_VARIANTS CortexA8 34 35;//------------------------------------------------------- 36;// This table for implementing switch case of C in asm by 37;// the mehtod of two levels of indexing. 38;//------------------------------------------------------- 39 40 M_TABLE armVCM4P10_pSwitchTable4x4 41 DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR 42 DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL 43 DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR 44 DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL 45 DCD OMX_VC_4x4_HU 46 47 48 IF CortexA8 49 50;//-------------------------------------------- 51;// Scratch variable 52;//-------------------------------------------- 53return RN 0 54pTable RN 8 55pc RN 15 56 57;//-------------------------------------------- 58;// Declare input registers 59;//-------------------------------------------- 60pSrcLeft RN 0 ;// input pointer 61pSrcAbove RN 1 ;// input pointer 62pSrcAboveLeft RN 2 ;// input pointer 63pDst RN 3 ;// output pointer 64leftStep RN 4 ;// input variable 65dstStep RN 5 ;// input variable 66predMode RN 6 ;// input variable 67availability RN 7 ;// input variable 68pDst1 RN 1 69pDst2 RN 4 70pDst3 RN 6 71 72pSrcTmp RN 9 73srcStep RN 10 74pDstTmp RN 11 75dstep RN 12 76 77;//------------------- 78;// Neon registers 79;//------------------- 80 81;// OMX_VC_CHROMA_VERT 82dAboveU32 DN D0.U32 83 84;// OMX_VC_CHROMA_HOR 85dLeftVal0 DN D0.8 86dLeftVal1 DN D1.8 87dLeftVal2 DN D2.8 88dLeftVal3 DN D3.8 89dLeftVal0U32 DN D0.U32 90dLeftVal1U32 DN D1.U32 91dLeftVal2U32 DN D2.U32 92dLeftVal3U32 DN D3.U32 93 94;// OMX_VC_4x4_DC 95dLeftVal DN D0.U8 96dLeftValU32 DN D0.U32 97dSumAboveLeftU16 DN D1.U16 98dSumAboveLeftU32 DN D1.U32 99dSumAboveLeftU64 DN D1.U64 100dSumAboveLeftU8 DN D1.U8 101dSum DN D0.U8 102 103dSumLeftValU16 DN D1.U16 104dSumLeftValU32 DN D1.U32 105dSumLeftValU64 DN D1.U64 106dSumLeftValU8 DN D1.U8 107 108dAboveVal DN D0.U8 109dSumAboveValU16 DN D1.U16 110dSumAboveValU32 DN D1.U32 111dSumAboveValU64 DN D1.U64 112dSumAboveValU8 DN D1.U8 113dConst128U8 DN D0.U8 114 115 116;//OMX_VC_4x4_DIAG_DL 117 118dAbove DN D0.U8 119dU7 DN D2.U8 120dU3 DN D2.U8 121dAbove0 DN D3.U8 122dAbove1 DN D4.U8 123dAbove2 DN D5.U8 124dTmp DN D6.U8 125dTmp0 DN D7.U8 126dTmp1 DN D8.U8 127dTmp2 DN D9.U8 128dTmp3 DN D10.U8 129dTmpU32 DN D6.U32 130 131 132;//OMX_VC_4x4_DIAG_DR 133dLeft DN D1.U8 134dUL DN D2.U8 135 136;//OMX_VC_4x4_VR 137dLeft0 DN D1.U8 138dLeft1 DN D2.U8 139dEven0 DN D3.U8 140dEven1 DN D4.U8 141dEven2 DN D5.U8 142dOdd0 DN D6.U8 143dOdd1 DN D11.U8 144dOdd2 DN D12.U8 145dTmp3U32 DN D10.U32 146dTmp2U32 DN D9.U32 147 148 149;//OMX_VC_4x4_HD 150dTmp1U64 DN D8.U64 151dTmp0U64 DN D7.U64 152dTmpU64 DN D6.U64 153dTmpU32 DN D6.U32 154dTmp1U32 DN D8.U32 155 156;//OMX_VC_4x4_HU 157dL3 DN D2.U8 158dLeftHU0 DN D3.U8 159dLeftHU1 DN D4.U8 160dLeftHU2 DN D5.U8 161dTmp0U32 DN D7.U32 162 163 164 165 166;//----------------------------------------------------------------------------------------------- 167;// omxVCM4P10_PredictIntra_4x4 starts 168;//----------------------------------------------------------------------------------------------- 169 170 ;// Write function header 171 M_START omxVCM4P10_PredictIntra_4x4, r12,d12 172 173 ;// Define stack arguments 174 M_ARG LeftStep, 4 175 M_ARG DstStep, 4 176 M_ARG PredMode, 4 177 M_ARG Availability, 4 178 179 180 LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case 181 182 ;// Load argument from the stack 183 M_LDRD predMode,availability,PredMode ;// Arg predMode & availability loaded from stack to reg 184 M_LDRD leftStep,dstStep,LeftStep ;// Arg leftStep & dstStep loaded from stack to reg 185 186 187 LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 188 189 190OMX_VC_4x4_HOR 191 192 ADD pSrcTmp, pSrcLeft, leftStep 193 ADD srcStep, leftStep, leftStep 194 ;// Load Left Edge 195 VLD1 {dLeftVal0[]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 196 VLD1 {dLeftVal1[]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 197 VLD1 {dLeftVal2[]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 198 VLD1 {dLeftVal3[]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 199 200 ADD pDstTmp, pDst, dstStep 201 ADD dstep, dstStep, dstStep 202 203 VST1 dLeftVal0U32[0],[pDst],dstep ;// pDst[0*dstStep+x] :0<= x <= 7 204 VST1 dLeftVal1U32[0],[pDstTmp],dstep ;// pDst[1*dstStep+x] :0<= x <= 7 205 VST1 dLeftVal2U32[0],[pDst] ;// pDst[2*dstStep+x] :0<= x <= 7 206 VST1 dLeftVal3U32[0],[pDstTmp] ;// pDst[3*dstStep+x] :0<= x <= 7 207 208 B ExitPredict4x4 ;// Branch to exit code 209 210OMX_VC_4x4_VERT 211 212 ;// Load Upper Edge 213 VLD1 dAboveU32[0],[pSrcAbove] 214 ADD pDstTmp, pDst, dstStep 215 ADD dstep, dstStep, dstStep 216 217DCPredict4x4VertStore 218 219 VST1 dAboveU32[0],[pDst],dstep 220 VST1 dAboveU32[0],[pDstTmp],dstep 221 VST1 dAboveU32[0],[pDst] 222 VST1 dAboveU32[0],[pDstTmp] 223 224 B ExitPredict4x4 ;// Branch to exit code 225 226OMX_VC_4x4_DC 227 228 229 TST availability, #OMX_VC_LEFT 230 BEQ DCPredict4x4LeftNotAvailable 231 232 ADD pSrcTmp, pSrcLeft, leftStep 233 ADD srcStep, leftStep, leftStep 234 ;// Load Left Edge 235 VLD1 {dLeftVal[0]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 236 VLD1 {dLeftVal[1]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 237 VLD1 {dLeftVal[2]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 238 VLD1 {dLeftVal[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 239 240 TST availability, #OMX_VC_UPPER 241 BEQ DCPredict4x4LeftOnlyAvailable 242 243 ;// Load Upper Edge also 244 VLD1 dLeftValU32[1],[pSrcAbove] ;// pSrcAbove[0 to 3] 245 MOV return, #OMX_Sts_NoErr 246 247 VPADDL dSumAboveLeftU16, dLeftVal ;// [pSrcAbove[2+3 | 0+1] | pSrcLeft[2+3 | 0+1]] 248 VPADDL dSumAboveLeftU32, dSumAboveLeftU16 ;// [pSrcAbove[2+3+0+1] | pSrcLeft[2+3+0+1]] 249 VPADDL dSumAboveLeftU64, dSumAboveLeftU32 ;// [pSrcAbove[2+3+0+1] + pSrcLeft[2+3+0+1]] 250 VRSHR dSumAboveLeftU64,dSumAboveLeftU64,#3 ;// Sum = (Sum + 4) >> 3 251 ADD pDstTmp, pDst, dstStep 252 ADD dstep, dstStep, dstStep 253 VDUP dSum,dSumAboveLeftU8[0] 254 255 B DCPredict4x4VertStore 256 257DCPredict4x4LeftOnlyAvailable 258 259 MOV return, #OMX_Sts_NoErr ;// returnNoError 260 261 VPADDL dSumLeftValU16, dLeftVal ;// [ XX | pSrcLeft[2+3 | 0+1]] 262 VPADDL dSumLeftValU32, dSumLeftValU16 ;// [ XXXX | pSrcLeft[2+3+0+1]] 263 264 VRSHR dSumLeftValU32,dSumLeftValU32,#2 ;// Sum = (Sum + 2) >> 2 265 ADD pDstTmp, pDst, dstStep 266 ADD dstep, dstStep, dstStep 267 VDUP dSum,dSumLeftValU8[0] 268 269 B DCPredict4x4VertStore 270 271DCPredict4x4LeftNotAvailable 272 273 TST availability, #OMX_VC_UPPER 274 BEQ DCPredict4x4NoneAvailable 275 276 ;// Load Upper Edge 277 VLD1 dAboveU32[0],[pSrcAbove] ;// pSrcAbove[0 to 3] 278 MOV return, #OMX_Sts_NoErr 279 280 VPADDL dSumAboveValU16, dAboveVal ;// [ XX | pSrcAbove[2+3 | 0+1]] 281 VPADDL dSumAboveValU32, dSumAboveValU16 ;// [ XXXX | pSrcAbove[2+3+0+1]] 282 283 VRSHR dSumAboveValU32,dSumAboveValU32,#2 ;// Sum = (Sum + 2) >> 2 284 ADD pDstTmp, pDst, dstStep 285 ADD dstep, dstStep, dstStep 286 VDUP dSum,dSumAboveValU8[0] 287 288 B DCPredict4x4VertStore 289 290DCPredict4x4NoneAvailable 291 292 VMOV dConst128U8,#0x80 ;// 0x8080808080808080 if(count == 0) 293 MOV return, #OMX_Sts_NoErr 294 295 ADD pDstTmp, pDst, dstStep 296 ADD dstep, dstStep, dstStep 297 B DCPredict4x4VertStore 298 299 300 301OMX_VC_4x4_DIAG_DL 302 303 TST availability, #OMX_VC_UPPER_RIGHT 304 BEQ DiagDLUpperRightNotAvailable 305 306 VLD1 dAbove0,[pSrcAbove] ;// [U7|U6|U5|U4|U3|U2|U1|U0] 307 VDUP dU7, dAbove0[7] ;// [U7|U7|U7|U7|U7|U7|U7|U7] 308 VEXT dAbove1, dAbove0, dU7, #1 ;// [U7|U7|U6|U5|U4|U3|U2|U1] 309 VEXT dAbove2, dAbove0, dU7, #2 ;// [U7|U7|U7|U6|U5|U4|U3|U2] 310 B DiagDLPredict4x4Store 311 312DiagDLUpperRightNotAvailable 313 VLD1 dAboveU32[1],[pSrcAbove] ;// [U3|U2|U1|U0|-|-|-|-] 314 VDUP dU3, dAbove[7] ;// [U3 U3 U3 U3 U3 U3 U3 U3] 315 316 VEXT dAbove0, dAbove, dU3, #4 ;// [U3 U3 U3 U3 U3 U2 U1 U0] 317 VEXT dAbove1, dAbove, dU3, #5 ;// [U3 U3 U3 U3 U3 U3 U2 U1] 318 VEXT dAbove2, dAbove, dU3, #6 ;// [U3 U3 U3 U3 U3 U3 U3 U2] 319 320DiagDLPredict4x4Store 321 322 VHADD dTmp, dAbove0, dAbove2 323 VRHADD dTmp, dTmp, dAbove1 ;// (a+2*b+c+2)>>2 324 325 326 VST1 dTmpU32[0],[pDst],dstStep 327 VEXT dTmp,dTmp,dTmp,#1 328 VST1 dTmpU32[0],[pDst],dstStep 329 VEXT dTmp,dTmp,dTmp,#1 330 VST1 dTmpU32[0],[pDst],dstStep 331 VEXT dTmp,dTmp,dTmp,#1 332 VST1 dTmpU32[0],[pDst] 333 334 B ExitPredict4x4 ;// Branch to exit code 335 336 337OMX_VC_4x4_DIAG_DR 338 339 340 ;// Load U0,U1,U2,U3 341 342 VLD1 dAboveU32[0],[pSrcAbove] ;// [X|X|X|X|U3|U2|U1|U0] 343 344 ;// Load UL,L0,L1,L2,L3 ;// dLeft = [UL|L0|L1|L2|L3|X|X|X] 345 VLD1 {dLeft[7]},[pSrcAboveLeft] 346 ADD pSrcTmp, pSrcLeft, leftStep 347 ADD srcStep, leftStep, leftStep 348 ADD pDst1,pDst,dstStep 349 350 VLD1 {dLeft[6]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 351 VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 352 VLD1 {dLeft[4]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 353 VLD1 {dLeft[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 354 355 356 VEXT dAbove0,dLeft,dAbove,#3 ;// [U2|U1|U0|UL|L0|L1|L2|L3] 357 ADD pDst2,pDst1,dstStep 358 VEXT dAbove1,dLeft,dAbove,#4 ;// [U3|U2|U1|U0|UL|L0|L1|L2] 359 ADD pDst3,pDst2,dstStep 360 VEXT dAbove2,dLeft,dAbove,#5 ;// [ X|U3|U2|U1|U0|UL|L0|L1] 361 362 VHADD dTmp, dAbove0, dAbove2 363 VRHADD dTmp, dTmp, dAbove1 ;// (a+2*b+c+2)>>2 364 365 366 VST1 dTmpU32[0],[pDst3] ;// Store pTmp[0],[1],[2],[3] @ pDst3 367 VEXT dTmp,dTmp,dTmp,#1 368 VST1 dTmpU32[0],[pDst2] ;// Store pTmp[1],[2],[3],[4] @ pDst2 369 VEXT dTmp,dTmp,dTmp,#1 370 VST1 dTmpU32[0],[pDst1] ;// Store pTmp[2],[3],[4],[5] @ pDst1 371 VEXT dTmp,dTmp,dTmp,#1 372 VST1 dTmpU32[0],[pDst] ;// Store pTmp[3],[4],[5],[6] @ pDst 373 374 B ExitPredict4x4 ;// Branch to exit code 375 376OMX_VC_4x4_VR 377 378 379 ;// Load UL,U0,U1,U2,U3 380 VLD1 dAboveU32[0],[pSrcAbove] 381 VLD1 dAbove[7],[pSrcAboveLeft] ;// [UL|X|X|X|U3|U2|U1|U0] 382 383 ;// Load L0,L1,L2 ;// dLeft0 = [L0|L2|X|X|X|X|X|X] 384 ;// dLeft1 = [L1| X|X|X|X|X|X|X] 385 VLD1 {dLeft0[7]},[pSrcLeft],leftStep ;// pSrcLeft[0*leftStep] 386 VLD1 {dLeft1[7]},[pSrcLeft],leftStep ;// pSrcLeft[1*leftStep] 387 VLD1 {dLeft0[6]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 388 389 390 VEXT dOdd2,dAbove,dAbove,#7 ;// [ x x x U3 U2 U1 U0 UL ] 391 VEXT dEven0,dLeft0,dOdd2,#6 ;// [ x x x U1 U0 UL L0 L2 ] 392 VEXT dEven1,dLeft1,dOdd2,#7 ;// [ x x x U2 U1 U0 UL L1 ] 393 VEXT dEven2,dLeft0,dAbove,#7 ;// [ x x x U3 U2 U1 U0 L0 ] 394 VEXT dOdd0,dLeft1,dAbove,#7 ;// [ x x x U3 U2 U1 U0 L1 ] 395 VEXT dOdd1,dLeft0,dOdd2,#7 ;// [ x x x U2 U1 U0 UL L0 ] 396 397 VHADD dTmp1, dOdd0, dOdd2 398 VRHADD dTmp1, dTmp1, dOdd1 ;// Tmp[ x x x 9 7 5 3 1 ] 399 400 VHADD dTmp0, dEven0, dEven2 401 VRHADD dTmp0, dTmp0, dEven1 ;// Tmp[ x x x 8 6 4 2 0 ] 402 403 404 VEXT dTmp3,dTmp1,dTmp1,#1 ;// Tmp[ x x x x 9 7 5 3 ] 405 ADD pDstTmp, pDst, dstStep 406 ADD dstep, dstStep, dstStep 407 VEXT dTmp2,dTmp0,dTmp0,#1 ;// Tmp[ x x x x 8 6 4 2 ] 408 409 410 VST1 dTmp3U32[0],[pDst],dstep ;// Tmp[9],[7],[5],[3] 411 VST1 dTmp2U32[0],[pDstTmp],dstep ;// Tmp[8],[6],[4],[2] 412 VST1 dTmp1U32[0],[pDst],dstep ;// Tmp[7],[5],[3],[1] 413 VST1 dTmp0U32[0],[pDstTmp] ;// Tmp[6],[4],[2],[0] 414 415 B ExitPredict4x4 ;// Branch to exit code 416 417OMX_VC_4x4_HD 418 419 420 ;// Load U0,U1,U2,U3 421 VLD1 dAbove,[pSrcAbove] ;//dAboveLeftVal = [U7|U6|U5|U4|U3|U2|U1|U0] 422 423 ;// Load UL,L0,L1,L2,L3 ;// dLeft = [UL|L0|L1|L2|L3|X|X|X] 424 VLD1 {dLeft[7]},[pSrcAboveLeft] 425 ADD pSrcTmp, pSrcLeft, leftStep 426 ADD srcStep, leftStep, leftStep 427 428 VLD1 {dLeft[6]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 429 VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 430 VLD1 {dLeft[4]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 431 VLD1 {dLeft[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 432 433 VEXT dAbove0,dLeft,dAbove,#3 ;// [ U2|U1|U0|UL|L0|L1|L2|L3 ] 434 VEXT dAbove1,dLeft,dAbove,#2 ;// [ U1|U0|UL|L0|L1|L2|L3|X ] 435 VEXT dAbove2,dLeft,dAbove,#1 ;// [ U0|UL|L0|L1|L2|L3|X|X ] 436 437 VHADD dTmp0, dAbove0, dAbove2 438 VRHADD dTmp0, dTmp0, dAbove1 ;// Tmp[ 0 | 1 | 2 | 4 | 6 | 8 | X | X ] 439 440 441 VRHADD dTmp1, dAbove1, dAbove0 ;// (a+b+1)>>1 442 VSHL dTmp1U64,dTmp1U64,#24 ;// Tmp[ 3|5| 7 |9 | X | X | X | X ] 443 444 445 VSHL dTmpU64,dTmp0U64,#16 ;// Tmp[ 2|4|6|8| X | X | X | X ] 446 VZIP dTmp1,dTmp ;// dTmp = [ 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ] 447 VEXT dTmp0,dTmp0,dTmp0,#6 ;// Tmp[ X| X| X| X| X| X| 0 | 1 ] 448 VEXT dTmp1,dTmp,dTmp0,#2 ;// Tmp[ 0 | 1 | 2 | 3 | 4 | 5 | 6 |7 ] 449 450 ADD pDstTmp, pDst, dstStep 451 ADD dstep, dstStep, dstStep 452 453 VST1 dTmp1U32[1],[pDst],dstep ;// Store pTmp[0|1|2|3] 454 VST1 dTmpU32[1],[pDstTmp],dstep ;// Store pTmp[2|3|4|5] 455 VST1 dTmp1U32[0],[pDst] ;// Store pTmp[4|5|6|7] 456 VST1 dTmpU32[0],[pDstTmp] ;// Store pTmp[6|7|8|9] 457 458 B ExitPredict4x4 ;// Branch to exit code 459 460OMX_VC_4x4_VL 461 462 463 TST availability, #OMX_VC_UPPER_RIGHT 464 BEQ DiagVLUpperRightNotAvailable 465 466 VLD1 dAbove0,[pSrcAbove] ;// [U7|U6|U5|U4|U3|U2|U1|U0] 467 VEXT dAbove1,dAbove0,dAbove0,#1 ;// [ X|U7|U6|U5|U4|U3|U2|U1] 468 VEXT dAbove2,dAbove1,dAbove1,#1 ;// [ X| X|U7|U6|U5|U4|U3|U2] 469 470 B DiagVLPredict4x4Store 471 472DiagVLUpperRightNotAvailable 473 VLD1 dAboveU32[1],[pSrcAbove] ;// [U3|U2|U1|U0|-|-|-|-] 474 VDUP dU3, dAbove[7] ;// [U3 U3 U3 U3 U3 U3 U3 U3] 475 476 VEXT dAbove0, dAbove, dU3, #4 ;// [U3 U3 U3 U3 U3 U2 U1 U0] 477 VEXT dAbove1, dAbove, dU3, #5 ;// [U3 U3 U3 U3 U3 U3 U2 U1] 478 VEXT dAbove2, dAbove, dU3, #6 ;// [U3 U3 U3 U3 U3 U3 U3 U2] 479 480DiagVLPredict4x4Store 481 482 VRHADD dTmp0, dAbove1, dAbove0 ;// (a+b+1)>>1 483 ;// Tmp[ X| X| X| 8| 6| 4| 2| 0 ] 484 485 VHADD dTmp3, dAbove0, dAbove2 486 VRHADD dTmp3, dTmp3, dAbove1 ;// (a+2*b+c+2)>>2 487 ;// Tmp[ X| X| X| 9| 7| 5| 3| 1 ] 488 489 VEXT dTmp1,dTmp0,dTmp0,#1 ;// Tmp[ X| X| X| X| 8| 6| 4| 2 ] 490 ADD pDstTmp, pDst, dstStep 491 ADD dstep, dstStep, dstStep 492 VEXT dTmp2,dTmp3,dTmp1,#1 ;// Tmp[ X| X| X| X| 9| 7| 5| 3 ] 493 494 VST1 dTmp0U32[0],[pDst],dstep ;// Tmp[6],[4],[2],[0] 495 VST1 dTmp3U32[0],[pDstTmp],dstep ;// Tmp[7],[5],[3],[1] 496 VST1 dTmp1U32[0],[pDst] ;// Tmp[8],[6],[4],[2] 497 VST1 dTmp2U32[0],[pDstTmp] ;// Tmp[9],[7],[5],[3] 498 499 B ExitPredict4x4 ;// Branch to exit code 500 501OMX_VC_4x4_HU 502 ADD pSrcTmp, pSrcLeft, leftStep 503 ADD srcStep, leftStep, leftStep 504 505 ;// Load Left Edge ;// [L3|L2|L1|L0|X|X|X|X] 506 VLD1 {dLeft[4]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep] 507 VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep] 508 VLD1 {dLeft[6]},[pSrcLeft] ;// pSrcLeft[2*leftStep] 509 VLD1 {dLeft[7]},[pSrcTmp] ;// pSrcLeft[3*leftStep] 510 511 VDUP dL3,dLeft[7] ;// [L3|L3|L3|L3|L3|L3|L3|L3] 512 513 VEXT dLeftHU0,dLeft,dL3,#4 ;// [L3|L3|L3|L3|L3|L2|L1|L0] 514 VEXT dLeftHU1,dLeft,dL3,#5 ;// [L3|L3|L3|L3|L3|L3|L2|L1] 515 VEXT dLeftHU2,dLeft,dL3,#6 ;// [L3|L3|L3|L3|L3|L3|L3|L2] 516 517 VHADD dTmp0, dLeftHU0, dLeftHU2 518 VRHADD dTmp0, dTmp0, dLeftHU1 ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 5 | 3 | 1 ] 519 520 VRHADD dTmp1, dLeftHU1, dLeftHU0 ;// (a+b+1)>>1 521 ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 4 | 2 | 0 ] 522 523 VZIP dTmp1,dTmp0 ;// dTmp1 = Tmp[7| 6| 5| 4| 3| 2| 1| 0] 524 ;// dTmp0 = [L3|L3|L3|L3|L3|L3|L3|L3] 525 526 527 VST1 dTmp1U32[0],[pDst],dstStep ;// [3|2|1|0] 528 VEXT dTmp1,dTmp1,dTmp1,#2 529 VST1 dTmp1U32[0],[pDst],dstStep ;// [5|4|3|2] 530 VEXT dTmp1,dTmp1,dTmp1,#2 531 VST1 dTmp1U32[0],[pDst],dstStep ;// [7|6|5|4] 532 VST1 dTmp0U32[0],[pDst] ;// [9|8|7|6] 533 534 535ExitPredict4x4 536 537 MOV return, #OMX_Sts_NoErr 538 M_END 539 540 ENDIF ;// CortexA8 541 542 END 543;//----------------------------------------------------------------------------------------------- 544;// omxVCM4P10_PredictIntra_4x4 ends 545;//----------------------------------------------------------------------------------------------- 546