omxVCM4P10_PredictIntra_16x16_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;// 2;// 3;// File Name: omxVCM4P10_PredictIntra_16x16_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 9641 6;// Date: Thursday, February 7, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 M_VARIANTS ARM1136JS 17 18;//------------------------------------------------------- 19;// This table for implementing switch case of C in asm by 20;// the mehtod of two levels of indexing. 21;//------------------------------------------------------- 22 23 M_TABLE armVCM4P10_pIndexTable16x16 24 DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR 25 DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE 26 27 IF ARM1136JS 28 29;//-------------------------------------------- 30;// Constants 31;//-------------------------------------------- 32BLK_SIZE EQU 0x10 33MUL_CONST0 EQU 0x01010101 34MUL_CONST1 EQU 0x00060004 35MUL_CONST2 EQU 0x00070005 36MUL_CONST3 EQU 0x00030001 37MASK_CONST EQU 0x00FF00FF 38 39;//-------------------------------------------- 40;// Scratch variable 41;//-------------------------------------------- 42y RN 12 43pc RN 15 44 45return RN 0 46innerCount RN 0 47outerCount RN 1 48pSrcLeft2 RN 1 49pDst2 RN 2 50sum RN 6 51pTable RN 9 52temp1 RN 10 53temp2 RN 12 54cMul1 RN 11 55cMul2 RN 12 56count RN 12 57dstStepx2 RN 11 58leftStepx2 RN 14 59r0x01010101 RN 10 60r0x00FF00FF RN 11 61 62tVal0 RN 0 63tVal1 RN 1 64tVal2 RN 2 65tVal3 RN 3 66tVal4 RN 4 67tVal5 RN 5 68tVal6 RN 6 69tVal7 RN 7 70tVal8 RN 8 71tVal9 RN 9 72tVal10 RN 10 73tVal11 RN 11 74tVal12 RN 12 75tVal14 RN 14 76 77b RN 12 78c RN 14 79 80p2p0 RN 0 81p3p1 RN 1 82p6p4 RN 2 83p7p5 RN 4 84p10p8 RN 6 85p11p9 RN 7 86p14p12 RN 8 87p15p13 RN 9 88 89p3210 RN 10 90p7654 RN 10 91p111098 RN 10 92p15141312 RN 10 93 94;//-------------------------------------------- 95;// Declare input registers 96;//-------------------------------------------- 97pSrcLeft RN 0 ;// input pointer 98pSrcAbove RN 1 ;// input pointer 99pSrcAboveLeft RN 2 ;// input pointer 100pDst RN 3 ;// output pointer 101leftStep RN 4 ;// input variable 102dstStep RN 5 ;// input variable 103predMode RN 6 ;// input variable 104availability RN 7 ;// input variable 105 106;//----------------------------------------------------------------------------------------------- 107;// omxVCM4P10_PredictIntra_16x16 starts 108;//----------------------------------------------------------------------------------------------- 109 110 ;// Write function header 111 M_START omxVCM4P10_PredictIntra_16x16, r11 112 113 ;// Define stack arguments 114 M_ARG LeftStep, 4 115 M_ARG DstStep, 4 116 M_ARG PredMode, 4 117 M_ARG Availability, 4 118 119 ;// M_STALL ARM1136JS=4 120 121 LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case 122 123 ;// Load argument from the stack 124 M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg 125 M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg 126 M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg 127 M_LDR availability, Availability ;// Arg availability loaded from stack to reg 128 129 MOV y, #BLK_SIZE ;// Outer Loop Count 130 LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode 131 132OMX_VC_16X16_VERT 133 LDM pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15] 134 ADD dstStepx2, dstStep, dstStep ;// double dstStep 135 ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 136 137 ;// M_STALL ARM1136JS=2 ;// Stall outside the loop 138 139LOOP_VERT 140 STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 141 SUBS y, y, #2 ;// y-- 142 ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep 143 STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 144 ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep 145 BNE LOOP_VERT ;// Loop for 8 times 146 MOV return, #OMX_Sts_NoErr 147 M_EXIT 148 149 150OMX_VC_16X16_HOR 151 152 ;// M_STALL ARM1136JS=6 153 154 LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times 155 MOV y, #4 ;// Outer Loop Count 156 M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] 157 ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 158 M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal1 = pSrcLeft[4 to 7] 159 ADD dstStepx2, dstStep, dstStep ;// double dstStep 160 SUB dstStepx2, dstStepx2, #12 ;// double dstStep minus 12 161 162LOOP_HOR 163 M_LDRB tVal8, [pSrcLeft], +leftStep ;// tVal8 = pSrcLeft[0 to 3] 164 MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes 165 M_LDRB tVal9, [pSrcLeft], +leftStep ;// tVal9 = pSrcLeft[4 to 7] 166 MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes 167 SUBS y, y, #1 ;// y-- 168 STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] 169 STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 170 STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] 171 STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] 172 MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes 173 STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] 174 STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] 175 MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes 176 M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] 177 M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] 178 STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] 179 STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] 180 STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] 181 STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] 182 STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] 183 STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] 184 M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] 185 M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] 186 M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] 187 M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal7 = pSrcLeft[4 to 7] 188 BNE LOOP_HOR ;// Loop for 3 times 189 MOV return, #OMX_Sts_NoErr 190 M_EXIT 191 192OMX_VC_16X16_DC 193 194 ;// M_STALL ARM1136JS=2 195 196 MOV count, #0 ;// count = 0 197 TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) 198 BEQ TST_LEFT ;// Jump to Left if not upper 199 LDM pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15] 200 ADD count, count, #1 ;// if upper inc count by 1 201 202 ;// M_STALL ARM1136JS=2 203 204 UXTB16 tVal2, tVal8 ;// pSrcAbove[0, 2] 205 UXTB16 tVal6, tVal9 ;// pSrcAbove[4, 6] 206 UADD16 tVal2, tVal2, tVal6 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] 207 UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] 208 UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] 209 UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7] 210 UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[7]) 211 212 UXTB16 tVal8, tVal10 ;// pSrcAbove[8, 10] 213 UXTB16 tVal9, tVal11 ;// pSrcAbove[12, 14] 214 UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14] 215 UXTB16 tVal10, tVal10, ROR #8 ;// pSrcAbove[9, 11] 216 UXTB16 tVal11, tVal11, ROR #8 ;// pSrcAbove[13, 15] 217 UADD16 tVal10, tVal10, tVal11 ;// pSrcAbove[9, 11] + pSrcAbove[13, 15] 218 UADD16 tVal8, tVal8, tVal10 ;// sum(pSrcAbove[8] to pSrcAbove[15]) 219 220 UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[15]) 221 222 ;// M_STALL ARM1136JS=1 223 224 ADD tVal2, tVal2, tVal2, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[15]) 225 226 ;// M_STALL ARM1136JS=1 227 228 UXTH sum, tVal2 ;// Extract the lower half for result 229 230TST_LEFT 231 TST availability, #OMX_VC_LEFT 232 BEQ TST_COUNT 233 ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep 234 ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep 235 236 M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 237 M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 238 M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 239 M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 240 ADD tVal7, tVal8, tVal9 ;// tVal7 = tVal8 + tVal9 241 ADD count, count, #1 ;// Inc Counter if Left is available 242 ADD tVal6, tVal10, tVal11 ;// tVal6 = tVal10 + tVal11 243 244 M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 245 M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 246 M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 247 M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 248 ADD sum, tVal7, tVal6 ;// sum = tVal8 + tVal10 249 ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 250 ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 251 ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 252 253 254 M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 255 M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 256 M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 257 M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 258 ADD sum, sum, tVal7 ;// sum = sum + tVal7 259 ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 260 ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 261 ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 262 263 264 M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] 265 M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] 266 M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] 267 M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] 268 ADD sum, sum, tVal7 ;// sum = sum + tVal7 269 ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 270 ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 271 ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 272 ADD sum, sum, tVal7 ;// sum = sum + tVal7 273 274TST_COUNT 275 CMP count, #0 ;// if(count == 0) 276 MOVEQ sum, #128 ;// sum = 128 if(count == 0) 277 BEQ TST_COUNT0 ;// if(count == 0) 278 CMP count, #1 ;// if(count == 1) 279 ADDEQ sum, sum, #8 ;// sum += 8 if(count == 1) 280 ADDNE sum, sum, tVal2 ;// sum = sumleft + sumupper 281 ADDNE sum, sum, #16 ;// sum += 16 if(count == 2) 282 283 ;// M_STALL ARM1136JS=1 284 285 UXTH sum, sum ;// sum only byte rest cleared 286 287 ;// M_STALL ARM1136JS=1 288 289 LSREQ sum, sum, #4 ;// sum >> 4 if(count == 1) 290 291 ;// M_STALL ARM1136JS=1 292 293 LSRNE sum, sum, #5 ;// sum >> 5 if(count == 2) 294 295TST_COUNT0 296 297 ;// M_STALL ARM1136JS=1 298 299 ORR sum, sum, sum, LSL #8 ;// sum replicated in two halfword 300 301 ;// M_STALL ARM1136JS=1 302 303 ORR tVal6, sum, sum, LSL #16 ;// sum replicated in all bytes 304 CPY tVal7, tVal6 ;// tVal1 = tVal0 305 CPY tVal8, tVal6 ;// tVal2 = tVal0 306 CPY tVal9, tVal6 ;// tVal3 = tVal0 307 ADD dstStepx2, dstStep, dstStep ;// double dstStep 308 ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep 309 MOV y, #BLK_SIZE ;// Outer Loop Count 310 311LOOP_DC 312 STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 313 SUBS y, y, #2 ;// y-- 314 ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep 315 STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 316 ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep 317 BNE LOOP_DC ;// Loop for 8 times 318 319 MOV return, #OMX_Sts_NoErr 320 M_EXIT 321 322OMX_VC_16X16_PLANE 323 324 ;// M_STALL ARM1136JS=3 325 RSB tVal14, leftStep, leftStep, LSL #4 ;// tVal14 = 15*leftStep 326 327 ;// M_STALL ARM1136JS=2 328 LDRB tVal10, [pSrcLeft, tVal14] ;// tVal10 = pSrcLeft[15*leftStep] 329 LDRB tVal11, [pSrcAboveLeft] ;// tVal11 = pSrcAboveLeft[0] 330 LDRB tVal12, [pSrcAbove, #15] 331 332 ADD tVal2, tVal12, tVal10 ;// tVal2 = pSrcAbove[15] + pSrcLeft[15*leftStep] 333 SUB tVal10, tVal10, tVal11 ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0] 334 SUB tVal11, tVal12, tVal11 ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0] 335 MOV tVal2, tVal2, LSL #4 ;// tVal2 = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep]) 336 337 MOV tVal11, tVal11, LSL #3 ;// 8*[15]-[-1] 338 LDRB tVal6, [pSrcAbove, #0] 339 LDRB tVal7, [pSrcAbove, #14] 340 SUB tVal8, tVal7, tVal6 341 RSB tVal8, tVal8, tVal8, LSL #3 ;// 7*[14]-[0] 342 ADD tVal11, tVal11, tVal8 343 LDRB tVal6, [pSrcAbove, #1] 344 LDRB tVal7, [pSrcAbove, #13] 345 SUB tVal8, tVal7, tVal6 346 ADD tVal8, tVal8, tVal8 347 ADD tVal8, tVal8, tVal8, LSL #1 ;// 6*[13]-[1] 348 ADD tVal11, tVal11, tVal8 349 LDRB tVal6, [pSrcAbove, #2] 350 LDRB tVal7, [pSrcAbove, #12] 351 SUB tVal8, tVal7, tVal6 352 ADD tVal8, tVal8, tVal8, LSL #2 ;// 5*[12]-[2] 353 ADD tVal11, tVal11, tVal8 354 LDRB tVal6, [pSrcAbove, #3] 355 LDRB tVal7, [pSrcAbove, #11] 356 SUB tVal8, tVal7, tVal6 357 ADD tVal11, tVal11, tVal8, LSL #2 ;// + 4*[11]-[3] 358 LDRB tVal6, [pSrcAbove, #4] 359 LDRB tVal7, [pSrcAbove, #10] 360 SUB tVal8, tVal7, tVal6 361 ADD tVal8, tVal8, tVal8, LSL #1 ;// 3*[10]-[4] 362 ADD tVal11, tVal11, tVal8 363 LDRB tVal6, [pSrcAbove, #5] 364 LDRB tVal7, [pSrcAbove, #9] 365 SUB tVal8, tVal7, tVal6 366 ADD tVal11, tVal11, tVal8, LSL #1 ;// + 2*[9]-[5] 367 LDRB tVal6, [pSrcAbove, #6] 368 LDRB tVal7, [pSrcAbove, #8] 369 SUB tVal8, tVal7, tVal6 ;// 1*[8]-[6] 370 ADD tVal7, tVal11, tVal8 371 372 ADD tVal2, tVal2, #16 ;// tVal2 = a + 16 373 MOV tVal1, pSrcLeft ;// tVal4 = pSrcLeft 374 SUB tVal9, tVal14, leftStep ;// tVal9 = 14*leftStep 375 ADD tVal9, pSrcLeft, tVal9 ;// tVal9 = pSrcLeft + 14*leftStep 376 377 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[14*leftStep] 378 M_LDRB tVal11, [tVal1], +leftStep ;// tVal11 = pSrcLeft[0] 379 ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * H 380 ADD tVal7, tVal7, #32 ;// tVal7 = 5 * H + 32 381 SUB tVal8, tVal8, tVal11 ;// tVal8 = pSrcLeft[14*leftStep] - pSrcLeft[0] 382 ASR tVal12, tVal7, #6 ;// tVal12 = b = (5 * H + 32) >> 6 383 384 RSB tVal8, tVal8, tVal8, LSL #3 ;// tVal8 = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0]) 385 ADD tVal6, tVal8, tVal10, LSL #3 ;// tVal6 = V = V0 +V1 386 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[13*leftStep] 387 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[leftStep] 388 RSB tVal7, tVal12, tVal12, LSL #3 ;// tVal7 = 7*b 389 SUB tVal2, tVal2, tVal7 ;// tVal2 = a + 16 - 7*b 390 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[13*leftStep] - pSrcLeft[leftStep] 391 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[12*lS] 392 ADD tVal7, tVal7, tVal7 ;// tVal7 = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) 393 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[2*leftStep] 394 ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) 395 ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V2 396 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep] 397 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[11*leftStep] 398 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[3*leftStep] 399 ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]) 400 ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V3 401 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep] 402 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[10*leftStep] 403 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[4*leftStep] 404 ADD tVal6, tVal6, tVal7, LSL #2 ;// tVal6 = V = V + V4 405 SUB dstStep, dstStep, #16 ;// tVal5 = dstStep - 16 406 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep] 407 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[9*leftStep] 408 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[5*leftStep] 409 ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]) 410 ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V5 411 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep] 412 M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[8*leftStep] 413 M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[6*leftStep] 414 ADD tVal6, tVal6, tVal7, LSL #1 ;// tVal6 = V = V + V6 415 416 ;// M_STALL ARM1136JS=1 417 SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep] 418 ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V7 419 420 ;// M_STALL ARM1136JS=1 421 ADD tVal6, tVal6, tVal6, LSL #2 ;// tVal6 = 5*V 422 ADD tVal6, tVal6, #32 ;// tVal6 = 5*V + 32 423 424 ;// M_STALL ARM1136JS=1 425 ASR tVal14, tVal6, #6 ;// tVal14 = c = (5*V + 32)>>6 426 427 ;// M_STALL ARM1136JS=1 428 RSB tVal6, tVal14, tVal14, LSL #3 ;// tVal6 = 7*c 429 UXTH tVal14, tVal14 ;// tVal14 = Cleared the upper half word 430 ADD tVal10, tVal12, tVal12 ;// tVal10 = 2*b 431 ORR tVal14, tVal14, tVal14, LSL #16 ;// tVal14 = {c , c} 432 SUB tVal6, tVal2, tVal6 ;// tVal6 = d = a - 7*b - 7*c + 16 433 ADD tVal1, tVal6, tVal10 ;// tVal1 = pp2 = d + 2*b 434 ADD tVal10, tVal10, tVal12 ;// tVal10 =3*b 435 ORR tVal0, tVal6, tVal1, LSL #16 ;// tval0 = p2p0 = pack {p2, p0} 436 UXTH tVal12, tVal12 ;// tVal12 = Cleared the upper half word 437 UXTH tVal10, tVal10 ;// tVal12 = Cleared the upper half word 438 ORR tVal12, tVal12, tVal12, LSL #16 ;// tVal12 = {b , b} 439 ORR tVal10, tVal10, tVal10, LSL #16 ;// tVal10 = {3b , 3b} 440 SADD16 tVal1, tVal0, tVal12 ;// tVal1 = p3p1 = p2p0 + {b,b} 441 SADD16 tVal2, tVal1, tVal10 ;// tVal2 = p6p4 = p3p1 + {3b,3b} 442 SADD16 tVal4, tVal2, tVal12 ;// tVal4 = p7p5 = p6p4 + {b,b} 443 SADD16 tVal6, tVal4, tVal10 ;// tVal6 = p10p8 = p7p5 + {3b,3b} 444 SADD16 tVal7, tVal6, tVal12 ;// tVal7 = p11p9 = p10p8 + {b,b} 445 SADD16 tVal8, tVal7, tVal10 ;// tVal8 = p14p12 = p11p9 + {3b,3b} 446 SADD16 tVal9, tVal8, tVal12 ;// tVal9 = p15p13 = p14p12 + {b,b} 447 LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF 448 449LOOP_PLANE 450 451 USAT16 temp2, #13, p3p1 452 USAT16 temp1, #13, p2p0 453 SADD16 p3p1, p3p1, c 454 SADD16 p2p0, p2p0, c 455 AND temp2, r0x00FF00FF, temp2, ASR #5 456 AND temp1, r0x00FF00FF, temp1, ASR #5 457 ORR temp1, temp1, temp2, LSL #8 458 STR temp1, [pDst], #4 459 460 USAT16 temp2, #13, p7p5 461 USAT16 temp1, #13, p6p4 462 SADD16 p7p5, p7p5, c 463 SADD16 p6p4, p6p4, c 464 AND temp2, r0x00FF00FF, temp2, ASR #5 465 AND temp1, r0x00FF00FF, temp1, ASR #5 466 ORR temp1, temp1, temp2, LSL #8 467 STR temp1, [pDst], #4 468 469 USAT16 temp2, #13, p11p9 470 USAT16 temp1, #13, p10p8 471 SADD16 p11p9, p11p9, c 472 SADD16 p10p8, p10p8, c 473 AND temp2, r0x00FF00FF, temp2, ASR #5 474 AND temp1, r0x00FF00FF, temp1, ASR #5 475 ORR temp1, temp1, temp2, LSL #8 476 STR temp1, [pDst], #4 477 478 USAT16 temp2, #13, p15p13 479 USAT16 temp1, #13, p14p12 480 SADD16 p15p13, p15p13, c 481 SADD16 p14p12, p14p12, c 482 AND temp2, r0x00FF00FF, temp2, ASR #5 483 AND temp1, r0x00FF00FF, temp1, ASR #5 484 ORR temp1, temp1, temp2, LSL #8 485 STR temp1, [pDst], #4 486 487 ADDS r0x00FF00FF, r0x00FF00FF, #1<<28 ;// Loop counter value in top 4 bits 488 489 ADD pDst, pDst, dstStep 490 491 BCC LOOP_PLANE ;// Loop for 16 times 492 MOV return, #OMX_Sts_NoErr 493 M_END 494 495 ENDIF ;// ARM1136JS 496 497 498 END 499;----------------------------------------------------------------------------------------------- 500; omxVCM4P10_PredictIntra_16x16 ends 501;----------------------------------------------------------------------------------------------- 502