1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 IMPORT armVCM4P10_QPDivTable 32 IMPORT armVCM4P10_VMatrixQPModTable 33 34 M_VARIANTS CortexA8 35 36 37 38 39 IF CortexA8 40 41;// ARM Registers 42;//-------------------------------------- 43;// Declare input registers 44;//-------------------------------------- 45ppSrc RN 0 46pDst RN 1 47QP RN 2 48 49;//-------------------------------- 50;// Scratch variable for Unpack2x2 51;//-------------------------------- 52pSrc RN 9 53Value RN 4 54Value2 RN 5 55Flag RN 6 56strOffset RN 7 57cstOffset RN 8 58 59;//-------------------------------- 60;// Scratch variable 61;//-------------------------------- 62r0w0 RN 3 63r0w1 RN 4 64 65c0w0 RN 5 66c1w0 RN 6 67 68return RN 0 69pQPDivTable RN 5 70pQPModTable RN 6 71Shift RN 9 72Scale RN 2 73 74 75 76;// Neon Registers 77 78dZero DN D0.U16 79dInvTrCoeff DN D0.S16 80dScale DN D1.S16 81qDqntCoeff QN Q1.S32 82dDqntCoeff DN D2.S16 83 84 85 ;// Write function header 86 M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9 87 88 LDR pSrc, [ppSrc] ;// Load pSrc 89 VMOV dZero, #0 90 MOV cstOffset, #31 ;// To be used in the loop, to compute offset 91 92 ;//----------------------------------------------------------------------- 93 ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero 94 ;//----------------------------------------------------------------------- 95 96 VST1 dZero,[pDst] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 97 LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> 98 99 100unpackLoop 101 TST Flag, #0x10 ;// Computing (Flag & 0x10) 102 LDRSBNE Value2,[pSrc,#1] 103 LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access 104 AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; 105 LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ 106 ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ 107 108 TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done 109 LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration 110 STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> 111 BEQ unpackLoop ;// Branch to the loop beginning 112 113 ;//-------------------------------------------------- 114 ;//InvTransformDC2x2: Inlined (Implemented in ARM V6) 115 ;//-------------------------------------------------- 116 117 LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2| 118 119 STR pSrc, [ppSrc] ;// Update the bitstream pointer 120 121 LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer 122 LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer 123 124 SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ] 125 SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ] 126 127 LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] 128 LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] 129 130 SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ] 131 SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ] 132 133 ;//------------------------------------------------- 134 ;//DequantChromaDC2x2: Inlined (Neon Implementation) 135 ;//------------------------------------------------- 136 137 LSL Scale, Scale, Shift ;// Scale = Scale << Shift 138 VMOV dInvTrCoeff, c0w0, c1w0 139 VREV32 dInvTrCoeff,dInvTrCoeff 140 VDUP dScale,Scale 141 142 VMULL qDqntCoeff,dInvTrCoeff,dScale 143 VSHRN dDqntCoeff,qDqntCoeff,#1 144 145 146 VST1 dDqntCoeff,[pDst] ;// Storing all the coefficients at once 147 148 MOV return, #OMX_Sts_NoErr 149 M_END 150 151 ENDIF ;// CortexA8 152 153 154 END 155