1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  omxVCM4P10_TransformDequantChromaDCFromPair_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   12290
21;// Date:       Wednesday, April 9, 2008
22;//
23;//
24;//
25;//
26
27
28        INCLUDE omxtypes_s.h
29        INCLUDE armCOMM_s.h
30
31        IMPORT armVCM4P10_QPDivTable
32        IMPORT armVCM4P10_VMatrixQPModTable
33
34        M_VARIANTS CortexA8
35
36
37
38
39    IF CortexA8
40
41;// ARM Registers
42;//--------------------------------------
43;// Declare input registers
44;//--------------------------------------
45ppSrc       RN 0
46pDst        RN 1
47QP          RN 2
48
49;//--------------------------------
50;// Scratch variable for Unpack2x2
51;//--------------------------------
52pSrc        RN 9
53Value       RN 4
54Value2      RN 5
55Flag        RN 6
56strOffset   RN 7
57cstOffset   RN 8
58
59;//--------------------------------
60;// Scratch variable
61;//--------------------------------
62r0w0        RN  3
63r0w1        RN  4
64
65c0w0        RN  5
66c1w0        RN  6
67
68return      RN  0
69pQPDivTable RN  5
70pQPModTable    RN  6
71Shift        RN  9
72Scale        RN  2
73
74
75
76;// Neon Registers
77
78dZero       DN  D0.U16
79dInvTrCoeff DN  D0.S16
80dScale      DN  D1.S16
81qDqntCoeff  QN  Q1.S32
82dDqntCoeff  DN  D2.S16
83
84
85        ;// Write function header
86        M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
87
88        LDR     pSrc, [ppSrc]                        ;// Load pSrc
89        VMOV    dZero, #0
90        MOV     cstOffset, #31                       ;// To be used in the loop, to compute offset
91
92        ;//-----------------------------------------------------------------------
93        ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
94        ;//-----------------------------------------------------------------------
95
96        VST1    dZero,[pDst]                         ;// pDst[0]  = pDst[1]  = pDst[2]  = pDst[3]  = 0
97        LDRB     Flag,  [pSrc], #1                   ;// Preload <Flag> before <unpackLoop>
98
99
100unpackLoop
101        TST      Flag,  #0x10                        ;// Computing (Flag & 0x10)
102        LDRSBNE  Value2,[pSrc,#1]
103        LDRBNE   Value, [pSrc], #2                   ;// Load byte wise to avoid unaligned access
104        AND      strOffset, cstOffset, Flag, LSL #1  ;// strOffset = (Flag & 15) < 1;
105        LDRSBEQ  Value, [pSrc], #1                   ;// Value = (OMX_U8)  *pSrc++
106        ORRNE    Value,Value,Value2, LSL #8          ;// Value = (OMX_U16) *pSrc++
107
108        TST      Flag,  #0x20                        ;// Computing (Flag & 0x20) to check, if we're done
109        LDRBEQ   Flag,  [pSrc], #1                   ;// Flag  = (OMX_U8) *pSrc++, for next iteration
110        STRH     Value, [pDst, strOffset]            ;// Store <Value> at offset <strOffset>
111        BEQ      unpackLoop                          ;// Branch to the loop beginning
112
113        ;//--------------------------------------------------
114        ;//InvTransformDC2x2: Inlined (Implemented in ARM V6)
115        ;//--------------------------------------------------
116
117        LDMIA    pDst, {r0w0, r0w1}                  ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
118
119        STR      pSrc, [ppSrc]                       ;// Update the bitstream pointer
120
121        LDR      pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
122        LDR      pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
123
124        SADDSUBX r0w0, r0w0,  r0w0                   ;// [ c00+c01, c00-c01 ]
125        SADDSUBX r0w1, r0w1,  r0w1                   ;// [ c10+c11, c10-c11 ]
126
127        LDRSB    Shift, [pQPDivTable, QP]            ;// Shift = pQPDivTable[QP]
128        LDRSB    Scale, [pQPModTable, QP]            ;// Scale = pQPModTable[QP]
129
130        SADD16   c0w0, r0w0, r0w1                    ;// [ d00+d10, d01+d11 ]
131        SSUB16   c1w0, r0w0, r0w1                    ;// [ d00-d10, d01-d11 ]
132
133        ;//-------------------------------------------------
134        ;//DequantChromaDC2x2: Inlined (Neon Implementation)
135        ;//-------------------------------------------------
136
137        LSL      Scale, Scale, Shift                 ;// Scale = Scale << Shift
138        VMOV     dInvTrCoeff, c0w0, c1w0
139        VREV32   dInvTrCoeff,dInvTrCoeff
140        VDUP     dScale,Scale
141
142        VMULL    qDqntCoeff,dInvTrCoeff,dScale
143        VSHRN    dDqntCoeff,qDqntCoeff,#1
144
145
146        VST1     dDqntCoeff,[pDst]                   ;// Storing all the coefficients at once
147
148        MOV      return, #OMX_Sts_NoErr
149        M_END
150
151    ENDIF ;// CortexA8
152
153
154    END
155