1;// 2;// 3;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 12290 6;// Date: Wednesday, April 9, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 IMPORT armVCM4P10_QPDivTable 18 IMPORT armVCM4P10_VMatrixQPModTable 19 20 M_VARIANTS CortexA8 21 22 23 24 25 IF CortexA8 26 27;// ARM Registers 28;//-------------------------------------- 29;// Declare input registers 30;//-------------------------------------- 31ppSrc RN 0 32pDst RN 1 33QP RN 2 34 35;//-------------------------------- 36;// Scratch variable for Unpack2x2 37;//-------------------------------- 38pSrc RN 9 39Value RN 4 40Value2 RN 5 41Flag RN 6 42strOffset RN 7 43cstOffset RN 8 44 45;//-------------------------------- 46;// Scratch variable 47;//-------------------------------- 48r0w0 RN 3 49r0w1 RN 4 50 51c0w0 RN 5 52c1w0 RN 6 53 54return RN 0 55pQPDivTable RN 5 56pQPModTable RN 6 57Shift RN 9 58Scale RN 2 59 60 61 62;// Neon Registers 63 64dZero DN D0.U16 65dInvTrCoeff DN D0.S16 66dScale DN D1.S16 67qDqntCoeff QN Q1.S32 68dDqntCoeff DN D2.S16 69 70 71 ;// Write function header 72 M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9 73 74 LDR pSrc, [ppSrc] ;// Load pSrc 75 VMOV dZero, #0 76 MOV cstOffset, #31 ;// To be used in the loop, to compute offset 77 78 ;//----------------------------------------------------------------------- 79 ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero 80 ;//----------------------------------------------------------------------- 81 82 VST1 dZero,[pDst] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 83 LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> 84 85 86unpackLoop 87 TST Flag, #0x10 ;// Computing (Flag & 0x10) 88 LDRSBNE Value2,[pSrc,#1] 89 LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access 90 AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; 91 LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ 92 ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ 93 94 TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done 95 LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration 96 STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> 97 BEQ unpackLoop ;// Branch to the loop beginning 98 99 ;//-------------------------------------------------- 100 ;//InvTransformDC2x2: Inlined (Implemented in ARM V6) 101 ;//-------------------------------------------------- 102 103 LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2| 104 105 STR pSrc, [ppSrc] ;// Update the bitstream pointer 106 107 LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer 108 LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer 109 110 SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ] 111 SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ] 112 113 LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] 114 LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] 115 116 SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ] 117 SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ] 118 119 ;//------------------------------------------------- 120 ;//DequantChromaDC2x2: Inlined (Neon Implementation) 121 ;//------------------------------------------------- 122 123 LSL Scale, Scale, Shift ;// Scale = Scale << Shift 124 VMOV dInvTrCoeff, c0w0, c1w0 125 VREV32 dInvTrCoeff,dInvTrCoeff 126 VDUP dScale,Scale 127 128 VMULL qDqntCoeff,dInvTrCoeff,dScale 129 VSHRN dDqntCoeff,qDqntCoeff,#1 130 131 132 VST1 dDqntCoeff,[pDst] ;// Storing all the coefficients at once 133 134 MOV return, #OMX_Sts_NoErr 135 M_END 136 137 ENDIF ;// CortexA8 138 139 140 END 141