1bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
2bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
3bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// File Name:  armVCM4P10_DeblockingLuma_unsafe_s.s
4bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// OpenMAX DL: v1.0.2
5bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Revision:   9641
6bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Date:       Thursday, February 7, 2008
7bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
8bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
10bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
11bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
12bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
13bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        INCLUDE omxtypes_s.h
14bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        INCLUDE armCOMM_s.h
15bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
16bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_VARIANTS ARM1136JS
17bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
18bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
19bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
20bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    IF  ARM1136JS
21bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
22bebc99d6fa433c04139294a5057f8439d772dbd9James DongMASK_1  EQU 0x01010101
23bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
24bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Declare input registers
25bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
26bebc99d6fa433c04139294a5057f8439d772dbd9James DongpQ0        RN 0
27bebc99d6fa433c04139294a5057f8439d772dbd9James DongStepArg    RN 1
28bebc99d6fa433c04139294a5057f8439d772dbd9James DongtC0Arg     RN 2
29bebc99d6fa433c04139294a5057f8439d772dbd9James Dongalpha      RN 6
30bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
31bebc99d6fa433c04139294a5057f8439d772dbd9James Dongbeta       RN 14
32bebc99d6fa433c04139294a5057f8439d772dbd9James DongbS         RN 14
33bebc99d6fa433c04139294a5057f8439d772dbd9James DongtC0        RN 14
34bebc99d6fa433c04139294a5057f8439d772dbd9James DongptC0       RN 1
35bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
36bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Declare Local/Temporary variables
37bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
38bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Pixels
39bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_0     RN 3
40bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_1     RN 5
41bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_2     RN 4
42bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_3     RN 2
43bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_0     RN 8
44bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_1     RN 9
45bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_2     RN 10
46bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_3     RN 12
47bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
48bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
49bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Filtering
50bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
51bebc99d6fa433c04139294a5057f8439d772dbd9James Dongap0q0   RN 1
52bebc99d6fa433c04139294a5057f8439d772dbd9James Dongfilt    RN 2
53bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
54bebc99d6fa433c04139294a5057f8439d772dbd9James Dongm00     RN 7
55bebc99d6fa433c04139294a5057f8439d772dbd9James Dongm01     RN 11
56bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
57bebc99d6fa433c04139294a5057f8439d772dbd9James Dongapflg   RN 0
58bebc99d6fa433c04139294a5057f8439d772dbd9James Dongaqflg   RN 6
59bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
60bebc99d6fa433c04139294a5057f8439d772dbd9James DongtC      RN 1
61bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
62bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
63bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//Declarations for bSLT4 kernel
64bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
65bebc99d6fa433c04139294a5057f8439d772dbd9James Dongpos     RN 7
66bebc99d6fa433c04139294a5057f8439d772dbd9James Dongneg     RN 12
67bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
68bebc99d6fa433c04139294a5057f8439d772dbd9James DongP0a     RN 1
69bebc99d6fa433c04139294a5057f8439d772dbd9James DongP1a     RN 8
70bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ0a     RN 7
71bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ1a     RN 4
72bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
73bebc99d6fa433c04139294a5057f8439d772dbd9James Dongu1      RN 3
74bebc99d6fa433c04139294a5057f8439d772dbd9James Dongmax     RN 12
75bebc99d6fa433c04139294a5057f8439d772dbd9James Dongmin     RN 2
76bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
77bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
78bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
79bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//Declarations for bSGE4 kernel
80bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
81bebc99d6fa433c04139294a5057f8439d772dbd9James Dongq_3b    RN 9
82bebc99d6fa433c04139294a5057f8439d772dbd9James Dongp_3b    RN 0
83bebc99d6fa433c04139294a5057f8439d772dbd9James Dongapqflg  RN 12
84bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
85bebc99d6fa433c04139294a5057f8439d772dbd9James DongP0b     RN 6
86bebc99d6fa433c04139294a5057f8439d772dbd9James DongP1b     RN 7
87bebc99d6fa433c04139294a5057f8439d772dbd9James DongP2b     RN 1
88bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
89bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ0b     RN 9
90bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ1b     RN 0
91bebc99d6fa433c04139294a5057f8439d772dbd9James DongQ2b     RN 2
92bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
93bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Miscellanous
94bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
95bebc99d6fa433c04139294a5057f8439d772dbd9James Donga       RN 0
96bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt0      RN 3
97bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt1      RN 12
98bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt2      RN 7
99bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt3      RN 11
100bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt4      RN 4
101bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt5      RN 1
102bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt8      RN 6
103bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt9      RN 14
104bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt10     RN 5
105bebc99d6fa433c04139294a5057f8439d772dbd9James Dongt11     RN 9
106bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
107bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe()
108bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
109bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
110bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//        - 2 - filt, 0 - apflg, 6 - aqflg
111bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//        - 11 - m01, 7 - tC0
112bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
113bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Outputs - 1,8,7,11 - Output Pixels(P0a,P1a,Q0a,Q1a)
114bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
115bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Registers Corrupted - 0-3,5-12,14
116bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
117bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
118bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_START armVCM4P10_DeblockingLumabSLT4_unsafe, lr
119bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
120bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Since beta <= 18 and alpha <= 255 we know
121bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// -254 <= p0-q0 <= 254
122bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//  -17 <= q1-q0 <= 17
123bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//  -17 <= p1-p0 <= 17
124bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
125bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
126bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//
127bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
128bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
129bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
130bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
131bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t1, p_1, p_0
132bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MUL     tC0, t2, m01
133bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
134bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t2, q_1, q_0
135bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SSUB8   t1, t1, t2
136bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
137bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t2, p_0, q_0
138bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        AND     t2, t2, m01
139bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHSUB8  t1, t1, t2
140bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8  t5, p_0, q_0
141bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SSUB8   t1, t1, t2
142bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHSUB8  t1, t1, t5
143bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV     m00, #0
144bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t1, t1, m01
145bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHSUB8  t1, t1, t5
146bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
147bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// tC = tC0
148bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// if (ap < beta) tC++;
149bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// if (aq < beta) tC++;
150bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t5, filt, m01
151bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     tC0, tC0, m00
152bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQADD8  tC, tC0, apflg
153bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SSUB8   t1, t1, m00
154bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQADD8  tC, tC, aqflg
155bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
156bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Split into positive and negative part and clip
157bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     pos, t1, m00
158bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   neg, pos, t1
159bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t3, pos, tC
160bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     pos, tC, pos
161bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t3, neg, tC
162bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     neg, tC, neg
163bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
164bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//Reload m01
165bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR     m01,=MASK_1
166bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
167bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQADD8  P0a, p_0, pos
168bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQSUB8  Q0a, q_0, pos
169bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQSUB8  P0a, P0a, neg
170bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQADD8  Q0a, Q0a, neg
171bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
172bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Choose to store the filtered
173bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// value or the original pixel
174bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t1, filt, m01
175bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     P0a, P0a, p_0
176bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     Q0a, Q0a, q_0
177bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
178bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
179bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// u1 = (p0 + q0 + 1)>>1
180bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80
181bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN     p_0, p_0
182bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8  u1, q_0, p_0
183bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQADD8  max, p_1, tC0
184bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR     u1, u1, m01 ,LSL #7
185bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
186bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Calculate A = (p2+u1)>>1
187bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Then delta = Clip3( -tC0, tC0, A - p1)
188bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
189bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Clip P1
190bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHADD8  P1a, p_2, u1
191bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQSUB8  min, p_1, tC0
192bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t4, P1a, max
193bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     P1a, max, P1a
194bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t4, P1a, min
195bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     P1a, P1a, min
196bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
197bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Clip Q1
198bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHADD8  Q1a, q_2, u1
199bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQADD8  max, q_1, tC0
200bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UQSUB8  min, q_1, tC0
201bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t0, Q1a, max
202bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     Q1a, max, Q1a
203bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t0, Q1a, min
204bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     Q1a, Q1a, min
205bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
206bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Choose to store the filtered
207bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// value or the original pixel
208bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t0, apflg, m01
209bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     P1a, P1a, p_1
210bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t0, aqflg, m01
211bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     t3, Q1a, q_1
212bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
213bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_END
214bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
215bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
216bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
217bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
218bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//        - 2 - filt, 0 - apflg,aqflg
219bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//        - 1 - ap0q0, 6 - alpha
220bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//        - 7 - m00, 11 - m01
221bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
222bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b)
223bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
224bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Registers Corrupted - 0-3,5-12,14
225bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
226bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr
227bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
228bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// apflg = apflg && |p0-q0|<((alpha>>2)+2)
229bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2)
230bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
231bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_ARG   pDummy,4
232bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_ARG   pQ_3,4
233bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_ARG   pP_3,4
234bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
235bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHADD8  alpha, alpha, m00
236bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t9, p_2, p_0    ;//t9 = dp2p0
237bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHADD8  alpha, alpha, m00
238bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ADD     alpha, alpha, m01, LSL #1
239bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   ap0q0, ap0q0, alpha
240bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     apqflg, m00, apflg
241bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
242bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3
243bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3
244bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3)
245bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
246bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2
247bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2)
248bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
249bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3
250bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3
251bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2)
252bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
253bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Compute P0b
254bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t2, p_0, q_0
255bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SSUB8   t5, t9, t2
256bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
257bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t8, q_1, q_0
258bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t8, t5, t8
259bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
260bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t9, p_1, p_0
261bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t8, t8, t9
262bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHSUB8  t8, t8, t2
263bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t5, t5, t9
264bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t8, t8, m01
265bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t9, t5, m01
266bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   P0b, p_0, t8
267bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// P0b ready
268bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
269bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Compute P1b
270bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR   p_3b, pP_3
271bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   P1b, p_0, t9
272bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// P1b ready
273bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
274bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Compute P2b
275bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t9, p_2, p_0
276bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t5, t5, t9
277bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8  t9, p_3b, p_0
278bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR     a, p_3b, p_0
279bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        AND     a, a, m01
280bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t5, t5, a
281bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHADD8  a, p_0, q_1
282bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t5, t5, m01
283bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t5, t5, t9
284bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN     t9, p_1
285bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   P2b, p_0, t5
286bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// P2b ready
287bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
288bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8  a, a, t9
289bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR     t9, apqflg, m01
290bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t9, apqflg, t9
291bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
292bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR     a, a, m01, LSL #7
293bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     P0b, P0b, a
294bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     P1b, P1b, p_1
295bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     P2b, P2b, p_2
296bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
297bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t4, filt, m01
298bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     P0b, P0b, p_0
299bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
300bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
301bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3
302bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3
303bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3)
304bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
305bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2
306bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2)
307bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
308bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3
309bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3
310bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;//    = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2)
311bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
312bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
313bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Compute Q0b Q1b
314bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t4, q_2, q_0
315bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   a, p_0, q_0
316bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t9, p_1, p_0
317bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t0, t4, a
318bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t9, t0, t9
319bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHADD8  t10, q_0, p_1
320bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t9, t9, a
321bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   a, q_1, q_0
322bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t9, t9, a
323bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t0, t0, a
324bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t9, t9, m01
325bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  a, t0, m01
326bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t9, q_0, t9
327bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Q0b ready - t9
328bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
329bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV     t4, #0
330bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHADD8  apqflg, apqflg, t4
331bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
332bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   Q1b, q_0, a
333bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Q1b ready
334bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
335bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t4, apqflg, m01
336bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     Q1b, Q1b, q_1
337bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN     t11, q_1
338bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8  t10, t10, t11
339bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR   q_3b, pQ_3
340bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR     t10, t10, m01, LSL #7
341bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     t9, t9, t10
342bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
343bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Compute Q2b
344bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t4, q_2, q_0
345bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t4, t0, t4
346bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR     t0, q_3b, q_0
347bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        AND     t0, t0, m01
348bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t4, t4, t0
349bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8  t10, q_3b, q_0
350bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t4, t4, m01
351bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SHADD8  t4, t4, t10
352bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
353bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t10, filt, m01
354bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     Q0b, t9, q_0
355bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
356bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SADD8   t4, q_0, t4
357bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Q2b ready - t4
358bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
359bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        USUB8   t10, apqflg, m01
360bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        SEL     Q2b, t4, q_2
361bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
362bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_END
363bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
364bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ENDIF
365bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
366bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        END