10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  omxVCM4P10_TransformDequantChromaDCFromPair_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
290c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
300c1bc742181ded4930842b46e9507372f0b1b963James Dong
310c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_QPDivTable
320c1bc742181ded4930842b46e9507372f0b1b963James Dong        IMPORT armVCM4P10_VMatrixQPModTable
330c1bc742181ded4930842b46e9507372f0b1b963James Dong
340c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS CortexA8
350c1bc742181ded4930842b46e9507372f0b1b963James Dong
360c1bc742181ded4930842b46e9507372f0b1b963James Dong
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong
390c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF CortexA8
400c1bc742181ded4930842b46e9507372f0b1b963James Dong
410c1bc742181ded4930842b46e9507372f0b1b963James Dong;// ARM Registers
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------
430c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
440c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------------
450c1bc742181ded4930842b46e9507372f0b1b963James DongppSrc       RN 0
460c1bc742181ded4930842b46e9507372f0b1b963James DongpDst        RN 1
470c1bc742181ded4930842b46e9507372f0b1b963James DongQP          RN 2
480c1bc742181ded4930842b46e9507372f0b1b963James Dong
490c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable for Unpack2x2
510c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------
520c1bc742181ded4930842b46e9507372f0b1b963James DongpSrc        RN 9
530c1bc742181ded4930842b46e9507372f0b1b963James DongValue       RN 4
540c1bc742181ded4930842b46e9507372f0b1b963James DongValue2      RN 5
550c1bc742181ded4930842b46e9507372f0b1b963James DongFlag        RN 6
560c1bc742181ded4930842b46e9507372f0b1b963James DongstrOffset   RN 7
570c1bc742181ded4930842b46e9507372f0b1b963James DongcstOffset   RN 8
580c1bc742181ded4930842b46e9507372f0b1b963James Dong
590c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------
600c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Scratch variable
610c1bc742181ded4930842b46e9507372f0b1b963James Dong;//--------------------------------
620c1bc742181ded4930842b46e9507372f0b1b963James Dongr0w0        RN  3
630c1bc742181ded4930842b46e9507372f0b1b963James Dongr0w1        RN  4
640c1bc742181ded4930842b46e9507372f0b1b963James Dong
650c1bc742181ded4930842b46e9507372f0b1b963James Dongc0w0        RN  5
660c1bc742181ded4930842b46e9507372f0b1b963James Dongc1w0        RN  6
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dongreturn      RN  0
690c1bc742181ded4930842b46e9507372f0b1b963James DongpQPDivTable RN  5
700c1bc742181ded4930842b46e9507372f0b1b963James DongpQPModTable    RN  6
710c1bc742181ded4930842b46e9507372f0b1b963James DongShift        RN  9
720c1bc742181ded4930842b46e9507372f0b1b963James DongScale        RN  2
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James Dong
750c1bc742181ded4930842b46e9507372f0b1b963James Dong
760c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Neon Registers
770c1bc742181ded4930842b46e9507372f0b1b963James Dong
780c1bc742181ded4930842b46e9507372f0b1b963James DongdZero       DN  D0.U16
790c1bc742181ded4930842b46e9507372f0b1b963James DongdInvTrCoeff DN  D0.S16
800c1bc742181ded4930842b46e9507372f0b1b963James DongdScale      DN  D1.S16
810c1bc742181ded4930842b46e9507372f0b1b963James DongqDqntCoeff  QN  Q1.S32
820c1bc742181ded4930842b46e9507372f0b1b963James DongdDqntCoeff  DN  D2.S16
830c1bc742181ded4930842b46e9507372f0b1b963James Dong
840c1bc742181ded4930842b46e9507372f0b1b963James Dong
850c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Write function header
860c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
870c1bc742181ded4930842b46e9507372f0b1b963James Dong
880c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     pSrc, [ppSrc]                        ;// Load pSrc
890c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV    dZero, #0
900c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     cstOffset, #31                       ;// To be used in the loop, to compute offset
910c1bc742181ded4930842b46e9507372f0b1b963James Dong
920c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//-----------------------------------------------------------------------
930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//-----------------------------------------------------------------------
950c1bc742181ded4930842b46e9507372f0b1b963James Dong
960c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1    dZero,[pDst]                         ;// pDst[0]  = pDst[1]  = pDst[2]  = pDst[3]  = 0
970c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRB     Flag,  [pSrc], #1                   ;// Preload <Flag> before <unpackLoop>
980c1bc742181ded4930842b46e9507372f0b1b963James Dong
990c1bc742181ded4930842b46e9507372f0b1b963James Dong
1000c1bc742181ded4930842b46e9507372f0b1b963James DongunpackLoop
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      Flag,  #0x10                        ;// Computing (Flag & 0x10)
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSBNE  Value2,[pSrc,#1]
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRBNE   Value, [pSrc], #2                   ;// Load byte wise to avoid unaligned access
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND      strOffset, cstOffset, Flag, LSL #1  ;// strOffset = (Flag & 15) < 1;
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSBEQ  Value, [pSrc], #1                   ;// Value = (OMX_U8)  *pSrc++
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORRNE    Value,Value,Value2, LSL #8          ;// Value = (OMX_U16) *pSrc++
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong        TST      Flag,  #0x20                        ;// Computing (Flag & 0x20) to check, if we're done
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRBEQ   Flag,  [pSrc], #1                   ;// Flag  = (OMX_U8) *pSrc++, for next iteration
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong        STRH     Value, [pDst, strOffset]            ;// Store <Value> at offset <strOffset>
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong        BEQ      unpackLoop                          ;// Branch to the loop beginning
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//--------------------------------------------------
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//InvTransformDC2x2: Inlined (Implemented in ARM V6)
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//--------------------------------------------------
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDMIA    pDst, {r0w0, r0w1}                  ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong        STR      pSrc, [ppSrc]                       ;// Update the bitstream pointer
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR      pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADDSUBX r0w0, r0w0,  r0w0                   ;// [ c00+c01, c00-c01 ]
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADDSUBX r0w1, r0w1,  r0w1                   ;// [ c10+c11, c10-c11 ]
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSB    Shift, [pQPDivTable, QP]            ;// Shift = pQPDivTable[QP]
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDRSB    Scale, [pQPModTable, QP]            ;// Scale = pQPModTable[QP]
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD16   c0w0, r0w0, r0w1                    ;// [ d00+d10, d01+d11 ]
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB16   c1w0, r0w0, r0w1                    ;// [ d00-d10, d01-d11 ]
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//-------------------------------------------------
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//DequantChromaDC2x2: Inlined (Neon Implementation)
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//-------------------------------------------------
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        LSL      Scale, Scale, Shift                 ;// Scale = Scale << Shift
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV     dInvTrCoeff, c0w0, c1w0
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        VREV32   dInvTrCoeff,dInvTrCoeff
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VDUP     dScale,Scale
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMULL    qDqntCoeff,dInvTrCoeff,dScale
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHRN    dDqntCoeff,qDqntCoeff,#1
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        VST1     dDqntCoeff,[pDst]                   ;// Storing all the coefficients at once
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV      return, #OMX_Sts_NoErr
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF ;// CortexA8
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
155