1bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
2bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
3bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
4bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// OpenMAX DL: v1.0.2
5bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Revision:   9641
6bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Date:       Thursday, February 7, 2008
7bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
8bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
10bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
11bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
12bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
13bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
14bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Functions:
15bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
16bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
17bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
18bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// First operand will be at offset ALIGNMENT from aligned address
19bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Second operand will be at aligned location and will be used as output.
20bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// destination pointed by (pDst) for vertical interpolation.
21bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// This function needs to copy 4 bytes in horizontal direction
22bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
23bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Registers used as input for this function
24bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
25bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
26bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Registers preserved for top level function
27bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// r4,r5,r6,r8,r9,r14
28bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
29bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Registers modified by the function
30bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// r7,r10,r11,r12
31bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
32bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Output registers
33bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// r2 - pointer to the aligned location
34bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// r3 - step size to this aligned location
35bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
36bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        INCLUDE omxtypes_s.h
37bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        INCLUDE armCOMM_s.h
38bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
39bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_VARIANTS ARM1136JS
40bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
41bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EXPORT armVCM4P10_Average_4x4_Align0_unsafe
42bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EXPORT armVCM4P10_Average_4x4_Align2_unsafe
43bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EXPORT armVCM4P10_Average_4x4_Align3_unsafe
44bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
45bebc99d6fa433c04139294a5057f8439d772dbd9James DongDEBUG_ON    SETL {FALSE}
46bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
47bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Declare input registers
48bebc99d6fa433c04139294a5057f8439d772dbd9James DongpPred0          RN 0
49bebc99d6fa433c04139294a5057f8439d772dbd9James DongiPredStep0      RN 1
50bebc99d6fa433c04139294a5057f8439d772dbd9James DongpPred1          RN 2
51bebc99d6fa433c04139294a5057f8439d772dbd9James DongiPredStep1      RN 3
52bebc99d6fa433c04139294a5057f8439d772dbd9James DongpDstPred        RN 2
53bebc99d6fa433c04139294a5057f8439d772dbd9James DongiDstStep        RN 3
54bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
55bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Declare other intermediate registers
56bebc99d6fa433c04139294a5057f8439d772dbd9James DongiPredA0         RN 10
57bebc99d6fa433c04139294a5057f8439d772dbd9James DongiPredA1         RN 11
58bebc99d6fa433c04139294a5057f8439d772dbd9James DongiPredB0         RN 12
59bebc99d6fa433c04139294a5057f8439d772dbd9James DongiPredB1         RN 14
60bebc99d6fa433c04139294a5057f8439d772dbd9James DongTemp1           RN 4
61bebc99d6fa433c04139294a5057f8439d772dbd9James DongTemp2           RN 5
62bebc99d6fa433c04139294a5057f8439d772dbd9James DongResultA         RN 5
63bebc99d6fa433c04139294a5057f8439d772dbd9James DongResultB         RN 4
64bebc99d6fa433c04139294a5057f8439d772dbd9James Dongr0x80808080     RN 7
65bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
66bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    IF ARM1136JS
67bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
68bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// This function calculates average of 4x4 block
69bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
70bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
71bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Function header
72bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
73bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
74bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Code start
75bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         r0x80808080, =0x80808080
76bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
77bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// 1st load
78bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredB0, [pPred1]
79bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
80bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredB1, [pPred1, iPredStep1]
81bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
82bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
83bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
84bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB0, iPredB0
85bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB1, iPredB1
86bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultA, iPredA0, iPredB0
87bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultB, iPredA1, iPredB1
88bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultA, ResultA, r0x80808080
89bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultA, [pDstPred], iDstStep
90bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultB, ResultB, r0x80808080
91bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultB, [pDstPred], iDstStep
92bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
93bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// 2nd load
94bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
95bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredB0, [pPred1]
96bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
97bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredB1, [pPred1, iPredStep1]
98bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
99bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB0, iPredB0
100bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultA, iPredA0, iPredB0
101bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB1, iPredB1
102bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultB, iPredA1, iPredB1
103bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultA, ResultA, r0x80808080
104bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultA, [pDstPred], iDstStep
105bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultB, ResultB, r0x80808080
106bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultB, [pDstPred], iDstStep
107bebc99d6fa433c04139294a5057f8439d772dbd9James DongEnd0
108bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_END
109bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
110bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// This function calculates average of 4x4 block
111bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
112bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
113bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Function header
114bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
115bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
116bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Code start
117bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         r0x80808080, =0x80808080
118bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
119bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// 1st load
120bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         Temp1, [pPred0, #4]
121bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
122bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredB0, [pPred1]
123bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredB1, [pPred1, iPredStep1]
124bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       Temp2, [pPred0, #4]
125bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
126bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB0, iPredB0
127bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB1, iPredB1
128bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV         iPredA0, iPredA0, LSR #16
129bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR         iPredA0, iPredA0, Temp1, LSL #16
130bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV         iPredA1, iPredA1, LSR #16
131bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR         iPredA1, iPredA1, Temp2, LSL #16
132bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
133bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
134bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultA, iPredA0, iPredB0
135bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultB, iPredA1, iPredB1
136bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultA, ResultA, r0x80808080
137bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultA, [pDstPred], iDstStep
138bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultB, ResultB, r0x80808080
139bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultB, [pDstPred], iDstStep
140bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
141bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// 2nd load
142bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         Temp1, [pPred0, #4]
143bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR         iPredA0, [pPred0], iPredStep0
144bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         iPredB0, [pPred1]
145bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         iPredB1, [pPred1, iPredStep1]
146bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         Temp2, [pPred0, #4]
147bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR         iPredA1, [pPred0], iPredStep0
148bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB0, iPredB0
149bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB1, iPredB1
150bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV         iPredA0, iPredA0, LSR #16
151bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR         iPredA0, iPredA0, Temp1, LSL #16
152bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV         iPredA1, iPredA1, LSR #16
153bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR         iPredA1, iPredA1, Temp2, LSL #16
154bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
155bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultA, iPredA0, iPredB0
156bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultB, iPredA1, iPredB1
157bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultA, ResultA, r0x80808080
158bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultA, [pDstPred], iDstStep
159bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultB, ResultB, r0x80808080
160bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultB, [pDstPred], iDstStep
161bebc99d6fa433c04139294a5057f8439d772dbd9James DongEnd2
162bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_END
163bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
164bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
165bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// This function calculates average of 4x4 block
166bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
167bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
168bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Function header
169bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
170bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
171bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// Code start
172bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         r0x80808080, =0x80808080
173bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
174bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// 1st load
175bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         Temp1, [pPred0, #4]
176bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
177bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         iPredB0, [pPred1]
178bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         iPredB1, [pPred1, iPredStep1]
179bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         Temp2, [pPred0, #4]
180bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
181bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
182bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB0, iPredB0
183bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB1, iPredB1
184bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV         iPredA0, iPredA0, LSR #24
185bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR         iPredA0, iPredA0, Temp1, LSL #8
186bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV         iPredA1, iPredA1, LSR #24
187bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR         iPredA1, iPredA1, Temp2, LSL #8
188bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultA, iPredA0, iPredB0
189bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultB, iPredA1, iPredB1
190bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultA, ResultA, r0x80808080
191bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultA, [pDstPred], iDstStep
192bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultB, ResultB, r0x80808080
193bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultB, [pDstPred], iDstStep
194bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
195bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// 2nd load
196bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         Temp1, [pPred0, #4]
197bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA0, [pPred0], iPredStep0
198bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         iPredB0, [pPred1]
199bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         iPredB1, [pPred1, iPredStep1]
200bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        LDR         Temp2, [pPred0, #4]
201bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_LDR       iPredA1, [pPred0], iPredStep0
202bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
203bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB0, iPredB0
204bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MVN         iPredB1, iPredB1
205bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV         iPredA0, iPredA0, LSR #24
206bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR         iPredA0, iPredA0, Temp1, LSL #8
207bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        MOV         iPredA1, iPredA1, LSR #24
208bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ORR         iPredA1, iPredA1, Temp2, LSL #8
209bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
210bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultA, iPredA0, iPredB0
211bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        UHSUB8      ResultB, iPredA1, iPredB1
212bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultA, ResultA, r0x80808080
213bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultA, [pDstPred], iDstStep
214bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        EOR         ResultB, ResultB, r0x80808080
215bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_STR       ResultB, [pDstPred], iDstStep
216bebc99d6fa433c04139294a5057f8439d772dbd9James DongEnd3
217bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        M_END
218bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
219bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ENDIF
220bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
221bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    END
222bebc99d6fa433c04139294a5057f8439d772dbd9James Dong