armVCM4P10_Average_4x_Align_unsafe_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   9641
21;// Date:       Thursday, February 7, 2008
22;//
23;//
24;//
25;//
26
27
28;// Functions:
29;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
30;//
31;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
32;// First operand will be at offset ALIGNMENT from aligned address
33;// Second operand will be at aligned location and will be used as output.
34;// destination pointed by (pDst) for vertical interpolation.
35;// This function needs to copy 4 bytes in horizontal direction
36;//
37;// Registers used as input for this function
38;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
39;//
40;// Registers preserved for top level function
41;// r4,r5,r6,r8,r9,r14
42;//
43;// Registers modified by the function
44;// r7,r10,r11,r12
45;//
46;// Output registers
47;// r2 - pointer to the aligned location
48;// r3 - step size to this aligned location
49
50        INCLUDE omxtypes_s.h
51        INCLUDE armCOMM_s.h
52
53        M_VARIANTS ARM1136JS
54
55        EXPORT armVCM4P10_Average_4x4_Align0_unsafe
56        EXPORT armVCM4P10_Average_4x4_Align2_unsafe
57        EXPORT armVCM4P10_Average_4x4_Align3_unsafe
58
59DEBUG_ON    SETL {FALSE}
60
61;// Declare input registers
62pPred0          RN 0
63iPredStep0      RN 1
64pPred1          RN 2
65iPredStep1      RN 3
66pDstPred        RN 2
67iDstStep        RN 3
68
69;// Declare other intermediate registers
70iPredA0         RN 10
71iPredA1         RN 11
72iPredB0         RN 12
73iPredB1         RN 14
74Temp1           RN 4
75Temp2           RN 5
76ResultA         RN 5
77ResultB         RN 4
78r0x80808080     RN 7
79
80    IF ARM1136JS
81
82        ;// This function calculates average of 4x4 block
83        ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
84
85        ;// Function header
86        M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
87
88        ;// Code start
89        LDR         r0x80808080, =0x80808080
90
91        ;// 1st load
92        M_LDR       iPredB0, [pPred1]
93        M_LDR       iPredA0, [pPred0], iPredStep0
94        M_LDR       iPredB1, [pPred1, iPredStep1]
95        M_LDR       iPredA1, [pPred0], iPredStep0
96
97        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
98        MVN         iPredB0, iPredB0
99        MVN         iPredB1, iPredB1
100        UHSUB8      ResultA, iPredA0, iPredB0
101        UHSUB8      ResultB, iPredA1, iPredB1
102        EOR         ResultA, ResultA, r0x80808080
103        M_STR       ResultA, [pDstPred], iDstStep
104        EOR         ResultB, ResultB, r0x80808080
105        M_STR       ResultB, [pDstPred], iDstStep
106
107        ;// 2nd load
108        M_LDR       iPredA0, [pPred0], iPredStep0
109        M_LDR       iPredB0, [pPred1]
110        M_LDR       iPredA1, [pPred0], iPredStep0
111        M_LDR       iPredB1, [pPred1, iPredStep1]
112
113        MVN         iPredB0, iPredB0
114        UHSUB8      ResultA, iPredA0, iPredB0
115        MVN         iPredB1, iPredB1
116        UHSUB8      ResultB, iPredA1, iPredB1
117        EOR         ResultA, ResultA, r0x80808080
118        M_STR       ResultA, [pDstPred], iDstStep
119        EOR         ResultB, ResultB, r0x80808080
120        M_STR       ResultB, [pDstPred], iDstStep
121End0
122        M_END
123
124        ;// This function calculates average of 4x4 block
125        ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
126
127        ;// Function header
128        M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
129
130        ;// Code start
131        LDR         r0x80808080, =0x80808080
132
133        ;// 1st load
134        LDR         Temp1, [pPred0, #4]
135        M_LDR       iPredA0, [pPred0], iPredStep0
136        M_LDR       iPredB0, [pPred1]
137        M_LDR       iPredB1, [pPred1, iPredStep1]
138        M_LDR       Temp2, [pPred0, #4]
139        M_LDR       iPredA1, [pPred0], iPredStep0
140        MVN         iPredB0, iPredB0
141        MVN         iPredB1, iPredB1
142        MOV         iPredA0, iPredA0, LSR #16
143        ORR         iPredA0, iPredA0, Temp1, LSL #16
144        MOV         iPredA1, iPredA1, LSR #16
145        ORR         iPredA1, iPredA1, Temp2, LSL #16
146
147        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
148        UHSUB8      ResultA, iPredA0, iPredB0
149        UHSUB8      ResultB, iPredA1, iPredB1
150        EOR         ResultA, ResultA, r0x80808080
151        M_STR       ResultA, [pDstPred], iDstStep
152        EOR         ResultB, ResultB, r0x80808080
153        M_STR       ResultB, [pDstPred], iDstStep
154
155        ;// 2nd load
156        LDR         Temp1, [pPred0, #4]
157        M_LDR         iPredA0, [pPred0], iPredStep0
158        LDR         iPredB0, [pPred1]
159        LDR         iPredB1, [pPred1, iPredStep1]
160        LDR         Temp2, [pPred0, #4]
161        M_LDR         iPredA1, [pPred0], iPredStep0
162        MVN         iPredB0, iPredB0
163        MVN         iPredB1, iPredB1
164        MOV         iPredA0, iPredA0, LSR #16
165        ORR         iPredA0, iPredA0, Temp1, LSL #16
166        MOV         iPredA1, iPredA1, LSR #16
167        ORR         iPredA1, iPredA1, Temp2, LSL #16
168
169        UHSUB8      ResultA, iPredA0, iPredB0
170        UHSUB8      ResultB, iPredA1, iPredB1
171        EOR         ResultA, ResultA, r0x80808080
172        M_STR       ResultA, [pDstPred], iDstStep
173        EOR         ResultB, ResultB, r0x80808080
174        M_STR       ResultB, [pDstPred], iDstStep
175End2
176        M_END
177
178
179        ;// This function calculates average of 4x4 block
180        ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
181
182        ;// Function header
183        M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
184
185        ;// Code start
186        LDR         r0x80808080, =0x80808080
187
188        ;// 1st load
189        LDR         Temp1, [pPred0, #4]
190        M_LDR       iPredA0, [pPred0], iPredStep0
191        LDR         iPredB0, [pPred1]
192        LDR         iPredB1, [pPred1, iPredStep1]
193        LDR         Temp2, [pPred0, #4]
194        M_LDR       iPredA1, [pPred0], iPredStep0
195
196        MVN         iPredB0, iPredB0
197        MVN         iPredB1, iPredB1
198        MOV         iPredA0, iPredA0, LSR #24
199        ORR         iPredA0, iPredA0, Temp1, LSL #8
200        MOV         iPredA1, iPredA1, LSR #24
201        ORR         iPredA1, iPredA1, Temp2, LSL #8
202        UHSUB8      ResultA, iPredA0, iPredB0
203        UHSUB8      ResultB, iPredA1, iPredB1
204        EOR         ResultA, ResultA, r0x80808080
205        M_STR       ResultA, [pDstPred], iDstStep
206        EOR         ResultB, ResultB, r0x80808080
207        M_STR       ResultB, [pDstPred], iDstStep
208
209        ;// 2nd load
210        LDR         Temp1, [pPred0, #4]
211        M_LDR       iPredA0, [pPred0], iPredStep0
212        LDR         iPredB0, [pPred1]
213        LDR         iPredB1, [pPred1, iPredStep1]
214        LDR         Temp2, [pPred0, #4]
215        M_LDR       iPredA1, [pPred0], iPredStep0
216
217        MVN         iPredB0, iPredB0
218        MVN         iPredB1, iPredB1
219        MOV         iPredA0, iPredA0, LSR #24
220        ORR         iPredA0, iPredA0, Temp1, LSL #8
221        MOV         iPredA1, iPredA1, LSR #24
222        ORR         iPredA1, iPredA1, Temp2, LSL #8
223
224        UHSUB8      ResultA, iPredA0, iPredB0
225        UHSUB8      ResultB, iPredA1, iPredB1
226        EOR         ResultA, ResultA, r0x80808080
227        M_STR       ResultA, [pDstPred], iDstStep
228        EOR         ResultB, ResultB, r0x80808080
229        M_STR       ResultB, [pDstPred], iDstStep
230End3
231        M_END
232
233    ENDIF
234
235    END
236
237