10c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Copyright (C) 2007-2008 ARM Limited
378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Licensed under the Apache License, Version 2.0 (the "License");
578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// you may not use this file except in compliance with the License.
678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// You may obtain a copy of the License at
778e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
878e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//      http://www.apache.org/licenses/LICENSE-2.0
978e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// Unless required by applicable law or agreed to in writing, software
1178e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// distributed under the License is distributed on an "AS IS" BASIS,
1278e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1378e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// See the License for the specific language governing permissions and
1478e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;// limitations under the License.
1578e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
1678e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar;//
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;// File Name:  armVCM4P10_DeblockingLuma_unsafe_s.s
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;// OpenMAX DL: v1.0.2
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Revision:   9641
210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Date:       Thursday, February 7, 2008
220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
230c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
260c1bc742181ded4930842b46e9507372f0b1b963James Dong
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE omxtypes_s.h
280c1bc742181ded4930842b46e9507372f0b1b963James Dong        INCLUDE armCOMM_s.h
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_VARIANTS ARM1136JS
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong
340c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF  ARM1136JS
350c1bc742181ded4930842b46e9507372f0b1b963James Dong
360c1bc742181ded4930842b46e9507372f0b1b963James DongMASK_1  EQU 0x01010101
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare input registers
390c1bc742181ded4930842b46e9507372f0b1b963James Dong
400c1bc742181ded4930842b46e9507372f0b1b963James DongpQ0        RN 0
410c1bc742181ded4930842b46e9507372f0b1b963James DongStepArg    RN 1
420c1bc742181ded4930842b46e9507372f0b1b963James DongtC0Arg     RN 2
430c1bc742181ded4930842b46e9507372f0b1b963James Dongalpha      RN 6
440c1bc742181ded4930842b46e9507372f0b1b963James Dong
450c1bc742181ded4930842b46e9507372f0b1b963James Dongbeta       RN 14
460c1bc742181ded4930842b46e9507372f0b1b963James DongbS         RN 14
470c1bc742181ded4930842b46e9507372f0b1b963James DongtC0        RN 14
480c1bc742181ded4930842b46e9507372f0b1b963James DongptC0       RN 1
490c1bc742181ded4930842b46e9507372f0b1b963James Dong
500c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Declare Local/Temporary variables
510c1bc742181ded4930842b46e9507372f0b1b963James Dong
520c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Pixels
530c1bc742181ded4930842b46e9507372f0b1b963James Dongp_0     RN 3
540c1bc742181ded4930842b46e9507372f0b1b963James Dongp_1     RN 5
550c1bc742181ded4930842b46e9507372f0b1b963James Dongp_2     RN 4
560c1bc742181ded4930842b46e9507372f0b1b963James Dongp_3     RN 2
570c1bc742181ded4930842b46e9507372f0b1b963James Dongq_0     RN 8
580c1bc742181ded4930842b46e9507372f0b1b963James Dongq_1     RN 9
590c1bc742181ded4930842b46e9507372f0b1b963James Dongq_2     RN 10
600c1bc742181ded4930842b46e9507372f0b1b963James Dongq_3     RN 12
610c1bc742181ded4930842b46e9507372f0b1b963James Dong
620c1bc742181ded4930842b46e9507372f0b1b963James Dong
630c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Filtering
640c1bc742181ded4930842b46e9507372f0b1b963James Dong
650c1bc742181ded4930842b46e9507372f0b1b963James Dongap0q0   RN 1
660c1bc742181ded4930842b46e9507372f0b1b963James Dongfilt    RN 2
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dongm00     RN 7
690c1bc742181ded4930842b46e9507372f0b1b963James Dongm01     RN 11
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dongapflg   RN 0
720c1bc742181ded4930842b46e9507372f0b1b963James Dongaqflg   RN 6
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James DongtC      RN 1
750c1bc742181ded4930842b46e9507372f0b1b963James Dong
760c1bc742181ded4930842b46e9507372f0b1b963James Dong
770c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Declarations for bSLT4 kernel
780c1bc742181ded4930842b46e9507372f0b1b963James Dong
790c1bc742181ded4930842b46e9507372f0b1b963James Dongpos     RN 7
800c1bc742181ded4930842b46e9507372f0b1b963James Dongneg     RN 12
810c1bc742181ded4930842b46e9507372f0b1b963James Dong
820c1bc742181ded4930842b46e9507372f0b1b963James DongP0a     RN 1
830c1bc742181ded4930842b46e9507372f0b1b963James DongP1a     RN 8
840c1bc742181ded4930842b46e9507372f0b1b963James DongQ0a     RN 7
850c1bc742181ded4930842b46e9507372f0b1b963James DongQ1a     RN 4
860c1bc742181ded4930842b46e9507372f0b1b963James Dong
870c1bc742181ded4930842b46e9507372f0b1b963James Dongu1      RN 3
880c1bc742181ded4930842b46e9507372f0b1b963James Dongmax     RN 12
890c1bc742181ded4930842b46e9507372f0b1b963James Dongmin     RN 2
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James Dong
920c1bc742181ded4930842b46e9507372f0b1b963James Dong
930c1bc742181ded4930842b46e9507372f0b1b963James Dong;//Declarations for bSGE4 kernel
940c1bc742181ded4930842b46e9507372f0b1b963James Dong
950c1bc742181ded4930842b46e9507372f0b1b963James Dongq_3b    RN 9
960c1bc742181ded4930842b46e9507372f0b1b963James Dongp_3b    RN 0
970c1bc742181ded4930842b46e9507372f0b1b963James Dongapqflg  RN 12
980c1bc742181ded4930842b46e9507372f0b1b963James Dong
990c1bc742181ded4930842b46e9507372f0b1b963James DongP0b     RN 6
1000c1bc742181ded4930842b46e9507372f0b1b963James DongP1b     RN 7
1010c1bc742181ded4930842b46e9507372f0b1b963James DongP2b     RN 1
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong
1030c1bc742181ded4930842b46e9507372f0b1b963James DongQ0b     RN 9
1040c1bc742181ded4930842b46e9507372f0b1b963James DongQ1b     RN 0
1050c1bc742181ded4930842b46e9507372f0b1b963James DongQ2b     RN 2
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Miscellanous
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong
1090c1bc742181ded4930842b46e9507372f0b1b963James Donga       RN 0
1100c1bc742181ded4930842b46e9507372f0b1b963James Dongt0      RN 3
1110c1bc742181ded4930842b46e9507372f0b1b963James Dongt1      RN 12
1120c1bc742181ded4930842b46e9507372f0b1b963James Dongt2      RN 7
1130c1bc742181ded4930842b46e9507372f0b1b963James Dongt3      RN 11
1140c1bc742181ded4930842b46e9507372f0b1b963James Dongt4      RN 4
1150c1bc742181ded4930842b46e9507372f0b1b963James Dongt5      RN 1
1160c1bc742181ded4930842b46e9507372f0b1b963James Dongt8      RN 6
1170c1bc742181ded4930842b46e9507372f0b1b963James Dongt9      RN 14
1180c1bc742181ded4930842b46e9507372f0b1b963James Dongt10     RN 5
1190c1bc742181ded4930842b46e9507372f0b1b963James Dongt11     RN 9
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe()
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - 2 - filt, 0 - apflg, 6 - aqflg
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - 11 - m01, 7 - tC0
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Outputs - 1,8,7,11 - Output Pixels(P0a,P1a,Q0a,Q1a)
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers Corrupted - 0-3,5-12,14
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_DeblockingLumabSLT4_unsafe, lr
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Since beta <= 18 and alpha <= 255 we know
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// -254 <= p0-q0 <= 254
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//  -17 <= q1-q0 <= 17
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//  -17 <= p1-p0 <= 17
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t1, p_1, p_0
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong        MUL     tC0, t2, m01
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t2, q_1, q_0
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB8   t1, t1, t2
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t2, p_0, q_0
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND     t2, t2, m01
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB8  t1, t1, t2
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8  t5, p_0, q_0
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB8   t1, t1, t2
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB8  t1, t1, t5
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     m00, #0
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t1, t1, m01
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB8  t1, t1, t5
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// tC = tC0
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// if (ap < beta) tC++;
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// if (aq < beta) tC++;
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t5, filt, m01
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     tC0, tC0, m00
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQADD8  tC, tC0, apflg
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB8   t1, t1, m00
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQADD8  tC, tC, aqflg
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Split into positive and negative part and clip
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     pos, t1, m00
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   neg, pos, t1
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t3, pos, tC
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     pos, tC, pos
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t3, neg, tC
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     neg, tC, neg
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//Reload m01
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong        LDR     m01,=MASK_1
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQADD8  P0a, p_0, pos
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQSUB8  Q0a, q_0, pos
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQSUB8  P0a, P0a, neg
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQADD8  Q0a, Q0a, neg
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Choose to store the filtered
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// value or the original pixel
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t1, filt, m01
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     P0a, P0a, p_0
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     Q0a, Q0a, q_0
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// u1 = (p0 + q0 + 1)>>1
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN     p_0, p_0
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8  u1, q_0, p_0
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQADD8  max, p_1, tC0
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR     u1, u1, m01 ,LSL #7
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Calculate A = (p2+u1)>>1
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Then delta = Clip3( -tC0, tC0, A - p1)
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Clip P1
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8  P1a, p_2, u1
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQSUB8  min, p_1, tC0
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t4, P1a, max
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     P1a, max, P1a
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t4, P1a, min
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     P1a, P1a, min
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Clip Q1
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8  Q1a, q_2, u1
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQADD8  max, q_1, tC0
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong        UQSUB8  min, q_1, tC0
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t0, Q1a, max
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     Q1a, max, Q1a
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t0, Q1a, min
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     Q1a, Q1a, min
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Choose to store the filtered
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// value or the original pixel
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t0, apflg, m01
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     P1a, P1a, p_1
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t0, aqflg, m01
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     t3, Q1a, q_1
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - 2 - filt, 0 - apflg,aqflg
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - 1 - ap0q0, 6 - alpha
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong;//        - 7 - m00, 11 - m01
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b)
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Registers Corrupted - 0-3,5-12,14
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// apflg = apflg && |p0-q0|<((alpha>>2)+2)
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2)
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   pDummy,4
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   pQ_3,4
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_ARG   pP_3,4
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8  alpha, alpha, m00
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t9, p_2, p_0    ;//t9 = dp2p0
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8  alpha, alpha, m00
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong        ADD     alpha, alpha, m01, LSL #1
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   ap0q0, ap0q0, alpha
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     apqflg, m00, apflg
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3)
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2)
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2)
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Compute P0b
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t2, p_0, q_0
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong        SSUB8   t5, t9, t2
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t8, q_1, q_0
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t8, t5, t8
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t9, p_1, p_0
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t8, t8, t9
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHSUB8  t8, t8, t2
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t5, t5, t9
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t8, t8, m01
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t9, t5, m01
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   P0b, p_0, t8
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// P0b ready
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Compute P1b
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   p_3b, pP_3
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   P1b, p_0, t9
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// P1b ready
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Compute P2b
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t9, p_2, p_0
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t5, t5, t9
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8  t9, p_3b, p_0
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR     a, p_3b, p_0
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND     a, a, m01
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t5, t5, a
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8  a, p_0, q_1
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t5, t5, m01
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t5, t5, t9
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN     t9, p_1
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   P2b, p_0, t5
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// P2b ready
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8  a, a, t9
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong        ORR     t9, apqflg, m01
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t9, apqflg, t9
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR     a, a, m01, LSL #7
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     P0b, P0b, a
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     P1b, P1b, p_1
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     P2b, P2b, p_2
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t4, filt, m01
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     P0b, P0b, p_0
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3)
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2)
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;//    = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2)
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Compute Q0b Q1b
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t4, q_2, q_0
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   a, p_0, q_0
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t9, p_1, p_0
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t0, t4, a
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t9, t0, t9
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8  t10, q_0, p_1
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t9, t9, a
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   a, q_1, q_0
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t9, t9, a
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t0, t0, a
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t9, t9, m01
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  a, t0, m01
3400c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t9, q_0, t9
3410c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Q0b ready - t9
3420c1bc742181ded4930842b46e9507372f0b1b963James Dong
3430c1bc742181ded4930842b46e9507372f0b1b963James Dong        MOV     t4, #0
3440c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHADD8  apqflg, apqflg, t4
3450c1bc742181ded4930842b46e9507372f0b1b963James Dong
3460c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   Q1b, q_0, a
3470c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Q1b ready
3480c1bc742181ded4930842b46e9507372f0b1b963James Dong
3490c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t4, apqflg, m01
3500c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     Q1b, Q1b, q_1
3510c1bc742181ded4930842b46e9507372f0b1b963James Dong        MVN     t11, q_1
3520c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8  t10, t10, t11
3530c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_LDR   q_3b, pQ_3
3540c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR     t10, t10, m01, LSL #7
3550c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     t9, t9, t10
3560c1bc742181ded4930842b46e9507372f0b1b963James Dong
3570c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Compute Q2b
3580c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t4, q_2, q_0
3590c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t4, t0, t4
3600c1bc742181ded4930842b46e9507372f0b1b963James Dong        EOR     t0, q_3b, q_0
3610c1bc742181ded4930842b46e9507372f0b1b963James Dong        AND     t0, t0, m01
3620c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t4, t4, t0
3630c1bc742181ded4930842b46e9507372f0b1b963James Dong        UHSUB8  t10, q_3b, q_0
3640c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t4, t4, m01
3650c1bc742181ded4930842b46e9507372f0b1b963James Dong        SHADD8  t4, t4, t10
3660c1bc742181ded4930842b46e9507372f0b1b963James Dong
3670c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t10, filt, m01
3680c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     Q0b, t9, q_0
3690c1bc742181ded4930842b46e9507372f0b1b963James Dong
3700c1bc742181ded4930842b46e9507372f0b1b963James Dong        SADD8   t4, q_0, t4
3710c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// Q2b ready - t4
3720c1bc742181ded4930842b46e9507372f0b1b963James Dong
3730c1bc742181ded4930842b46e9507372f0b1b963James Dong        USUB8   t10, apqflg, m01
3740c1bc742181ded4930842b46e9507372f0b1b963James Dong        SEL     Q2b, t4, q_2
3750c1bc742181ded4930842b46e9507372f0b1b963James Dong
3760c1bc742181ded4930842b46e9507372f0b1b963James Dong        M_END
3770c1bc742181ded4930842b46e9507372f0b1b963James Dong
3780c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
3790c1bc742181ded4930842b46e9507372f0b1b963James Dong
38078e52bfac041d71ce53b5b13c2abf78af742b09dLajos Molnar        END
381