10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  armVCM4P10_DeblockingChroma_unsafe_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   12290
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Wednesday, April 9, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS CortexA8
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF  CortexA8
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James DongpAlpha      RN 2
360c1bc742181ded4930842b46e9507372f0b1b963James DongpBeta       RN 3
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James DongpThresholds RN 5
390c1bc742181ded4930842b46e9507372f0b1b963James DongpBS         RN 4
400c1bc742181ded4930842b46e9507372f0b1b963James DongbS3210      RN 6
410c1bc742181ded4930842b46e9507372f0b1b963James Dong
420c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Pixels
430c1bc742181ded4930842b46e9507372f0b1b963James DongdP_0        DN D4.U8
440c1bc742181ded4930842b46e9507372f0b1b963James DongdP_1        DN D5.U8
450c1bc742181ded4930842b46e9507372f0b1b963James DongdP_2        DN D6.U8
460c1bc742181ded4930842b46e9507372f0b1b963James DongdP_3        DN D7.U8
470c1bc742181ded4930842b46e9507372f0b1b963James DongdQ_0        DN D8.U8
480c1bc742181ded4930842b46e9507372f0b1b963James DongdQ_1        DN D9.U8
490c1bc742181ded4930842b46e9507372f0b1b963James DongdQ_2        DN D10.U8
500c1bc742181ded4930842b46e9507372f0b1b963James DongdQ_3        DN D11.U8
510c1bc742181ded4930842b46e9507372f0b1b963James Dong
520c1bc742181ded4930842b46e9507372f0b1b963James Dong
530c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Filtering Decision
540c1bc742181ded4930842b46e9507372f0b1b963James DongdAlpha      DN D0.U8
550c1bc742181ded4930842b46e9507372f0b1b963James DongdBeta       DN D2.U8
560c1bc742181ded4930842b46e9507372f0b1b963James Dong
570c1bc742181ded4930842b46e9507372f0b1b963James DongdFilt       DN D16.U8
580c1bc742181ded4930842b46e9507372f0b1b963James DongdAqflg      DN D12.U8
590c1bc742181ded4930842b46e9507372f0b1b963James DongdApflg      DN D17.U8
600c1bc742181ded4930842b46e9507372f0b1b963James Dong
610c1bc742181ded4930842b46e9507372f0b1b963James DongdAp0q0      DN D13.U8
620c1bc742181ded4930842b46e9507372f0b1b963James Dong
630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// bSLT4
640c1bc742181ded4930842b46e9507372f0b1b963James DongdTC3210     DN D18.U8
650c1bc742181ded4930842b46e9507372f0b1b963James DongdTCs        DN D31.S8
660c1bc742181ded4930842b46e9507372f0b1b963James DongdTC         DN D31.U8
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James DongdMask_0     DN D14.U8
690c1bc742181ded4930842b46e9507372f0b1b963James DongdMask_1     DN D15.U8
700c1bc742181ded4930842b46e9507372f0b1b963James DongdMask_4     DN D26.U16
710c1bc742181ded4930842b46e9507372f0b1b963James Dong
720c1bc742181ded4930842b46e9507372f0b1b963James DongdTemp       DN D28.U8
730c1bc742181ded4930842b46e9507372f0b1b963James DongdDummy      DN D17.U8
740c1bc742181ded4930842b46e9507372f0b1b963James Dong
750c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Computing P0,Q0
760c1bc742181ded4930842b46e9507372f0b1b963James DongqDq0p0      QN Q10.S16
770c1bc742181ded4930842b46e9507372f0b1b963James DongqDp1q1      QN Q11.S16
780c1bc742181ded4930842b46e9507372f0b1b963James DongqDelta      QN Q10.S16  ; reuse qDq0p0
790c1bc742181ded4930842b46e9507372f0b1b963James DongdDelta      DN D20.S8
800c1bc742181ded4930842b46e9507372f0b1b963James Dong
810c1bc742181ded4930842b46e9507372f0b1b963James Dong
820c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Computing P1,Q1
830c1bc742181ded4930842b46e9507372f0b1b963James DongqP_0n       QN Q14.S16
840c1bc742181ded4930842b46e9507372f0b1b963James DongqQ_0n       QN Q12.S16
850c1bc742181ded4930842b46e9507372f0b1b963James Dong
860c1bc742181ded4930842b46e9507372f0b1b963James DongdQ_0n       DN D24.U8
870c1bc742181ded4930842b46e9507372f0b1b963James DongdP_0n       DN D29.U8
880c1bc742181ded4930842b46e9507372f0b1b963James Dong
890c1bc742181ded4930842b46e9507372f0b1b963James Dong;// bSGE4
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James DongdHSp0q1     DN D13.U8
920c1bc742181ded4930842b46e9507372f0b1b963James DongdHSq0p1     DN D31.U8
930c1bc742181ded4930842b46e9507372f0b1b963James Dong
940c1bc742181ded4930842b46e9507372f0b1b963James DongdBS3210     DN D28.U16
950c1bc742181ded4930842b46e9507372f0b1b963James Dong
960c1bc742181ded4930842b46e9507372f0b1b963James DongdP_0t       DN D13.U8   ;dHSp0q1
970c1bc742181ded4930842b46e9507372f0b1b963James DongdQ_0t       DN D31.U8   ;Temp1
980c1bc742181ded4930842b46e9507372f0b1b963James Dong
990c1bc742181ded4930842b46e9507372f0b1b963James DongdP_0n       DN D29.U8
1000c1bc742181ded4930842b46e9507372f0b1b963James DongdQ_0n       DN D24.U8   ;Temp2
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inputs - Pixels             - p0-p3: D4-D7, q0-q3: D8-D11
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - Filter masks       - filt: D16, aqflg: D12, apflg: D17
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - Additional Params  - pThresholds: r5
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Outputs - Pixels            - P0-P1: D29-D30, Q0-Q1: D24-D25
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong;//         - Additional Params - pThresholds: r5
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers Corrupted         - D18-D31
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_DeblockingChromabSLT4_unsafe
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;dTC3210 -18
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;dTemp-28
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        d18.U32[0], [pThresholds]! ;here
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qDp1q1-11
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qDq0p0-10
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUBL       qDp1q1, dP_1, dQ_1
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOV        dTemp, dTC3210
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUBL       qDq0p0, dQ_0, dP_0
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSHR        qDp1q1, qDp1q1, #2
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong        VZIP.8      dTC3210, dTemp
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qDelta-qDq0p0-10
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1)
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dTC3210-18
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dTemp-28
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dTC-31
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        VBIF        dTC3210, dMask_0, dFilt
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      qDelta, qDp1q1, qDq0p0
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADD        dTC, dTC3210, dMask_1
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQMOVN      dDelta, qDelta
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dDelta-d20
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta);
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        {dAlpha[]}, [pAlpha]
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMIN        dDelta, dDelta, dTCs
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        VNEG        dTCs, dTCs
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        {dBeta[]}, [pBeta]
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;1
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMAX        dDelta, dDelta, dTCs
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dP_0n - 29
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dQ_0n - 24
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta);
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta);
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qP_0n - 14
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// qQ_0n - 12
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOVL       qP_0n, dP_0
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        VMOVL       qQ_0n, dQ_0
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;1
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        VADDW       qP_0n, qP_0n, dDelta
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        VSUBW       qQ_0n, qQ_0n, dDelta
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQMOVUN     dP_0n, qP_0n
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        VQMOVUN     dQ_0n, qQ_0n
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inputs - Pixels             - p0-p3: D4-D7, q0-q3: D8-D11
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - Filter masks       - filt: D16, aqflg: D12, apflg: D17
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - Additional Params  - alpha: D0, dMask_1: D15
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Outputs - Pixels            - P0-P2: D29-D31, Q0-Q2: D24,D25,D28
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers Corrupted         - D18-D31
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_DeblockingChromabSGE4_unsafe
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;dHSq0p1 - 31
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;dHSp0q1 - 13
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       dHSp0q1, dP_0, dQ_1
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        VHADD       dHSq0p1, dQ_0, dP_1
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Prepare the bS mask
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dHSp0q1-13
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dP_0t-dHSp0q1-13
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dHSq0p1-31
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// dQ_0t-Temp1-31
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        {dAlpha[]}, [pAlpha]
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD         pThresholds, pThresholds, #4
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        VLD1        {dBeta[]}, [pBeta]
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dP_0t, dHSp0q1, dP_1
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        VRHADD      dQ_0t, dHSq0p1, dQ_1
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong        ENDIF
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        END
213