armVCM4P10_Average_4x_Align_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;//
2;//
3;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   12290
6;// Date:       Wednesday, April 9, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13
14;// Functions:
15;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
16;//
17;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
18;// First operand will be at offset ALIGNMENT from aligned address
19;// Second operand will be at aligned location and will be used as output.
20;// destination pointed by (pDst) for vertical interpolation.
21;// This function needs to copy 4 bytes in horizontal direction
22;//
23;// Registers used as input for this function
24;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
25;//
26;// Registers preserved for top level function
27;// r4,r5,r6,r8,r9,r14
28;//
29;// Registers modified by the function
30;// r7,r10,r11,r12
31;//
32;// Output registers
33;// r2 - pointer to the aligned location
34;// r3 - step size to this aligned location
35
36        INCLUDE omxtypes_s.h
37        INCLUDE armCOMM_s.h
38
39        M_VARIANTS ARM1136JS
40
41        EXPORT armVCM4P10_Average_4x4_Align0_unsafe
42        EXPORT armVCM4P10_Average_4x4_Align2_unsafe
43        EXPORT armVCM4P10_Average_4x4_Align3_unsafe
44
45DEBUG_ON    SETL {FALSE}
46
47;// Declare input registers
48pPred0          RN 0
49iPredStep0      RN 1
50pPred1          RN 2
51iPredStep1      RN 3
52pDstPred        RN 2
53iDstStep        RN 3
54
55;// Declare other intermediate registers
56iPredA0         RN 10
57iPredA1         RN 11
58iPredB0         RN 12
59iPredB1         RN 14
60Temp1           RN 4
61Temp2           RN 5
62ResultA         RN 5
63ResultB         RN 4
64r0x80808080     RN 7
65
66    IF ARM1136JS
67
68        ;// This function calculates average of 4x4 block
69        ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
70
71        ;// Function header
72        M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
73
74        ;// Code start
75        LDR         r0x80808080, =0x80808080
76
77        ;// 1st load
78        M_LDR       iPredB0, [pPred1]
79        M_LDR       iPredA0, [pPred0], iPredStep0
80        M_LDR       iPredB1, [pPred1, iPredStep1]
81        M_LDR       iPredA1, [pPred0], iPredStep0
82
83        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
84        MVN         iPredB0, iPredB0
85        MVN         iPredB1, iPredB1
86        UHSUB8      ResultA, iPredA0, iPredB0
87        UHSUB8      ResultB, iPredA1, iPredB1
88        EOR         ResultA, ResultA, r0x80808080
89        M_STR       ResultA, [pDstPred], iDstStep
90        EOR         ResultB, ResultB, r0x80808080
91        M_STR       ResultB, [pDstPred], iDstStep
92
93        ;// 2nd load
94        M_LDR       iPredA0, [pPred0], iPredStep0
95        M_LDR       iPredB0, [pPred1]
96        M_LDR       iPredA1, [pPred0], iPredStep0
97        M_LDR       iPredB1, [pPred1, iPredStep1]
98
99        MVN         iPredB0, iPredB0
100        UHSUB8      ResultA, iPredA0, iPredB0
101        MVN         iPredB1, iPredB1
102        UHSUB8      ResultB, iPredA1, iPredB1
103        EOR         ResultA, ResultA, r0x80808080
104        M_STR       ResultA, [pDstPred], iDstStep
105        EOR         ResultB, ResultB, r0x80808080
106        M_STR       ResultB, [pDstPred], iDstStep
107End0
108        M_END
109
110        ;// This function calculates average of 4x4 block
111        ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
112
113        ;// Function header
114        M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
115
116        ;// Code start
117        LDR         r0x80808080, =0x80808080
118
119        ;// 1st load
120        LDR         Temp1, [pPred0, #4]
121        M_LDR       iPredA0, [pPred0], iPredStep0
122        M_LDR       iPredB0, [pPred1]
123        M_LDR       iPredB1, [pPred1, iPredStep1]
124        M_LDR       Temp2, [pPred0, #4]
125        M_LDR       iPredA1, [pPred0], iPredStep0
126        MVN         iPredB0, iPredB0
127        MVN         iPredB1, iPredB1
128        MOV         iPredA0, iPredA0, LSR #16
129        ORR         iPredA0, iPredA0, Temp1, LSL #16
130        MOV         iPredA1, iPredA1, LSR #16
131        ORR         iPredA1, iPredA1, Temp2, LSL #16
132
133        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
134        UHSUB8      ResultA, iPredA0, iPredB0
135        UHSUB8      ResultB, iPredA1, iPredB1
136        EOR         ResultA, ResultA, r0x80808080
137        M_STR       ResultA, [pDstPred], iDstStep
138        EOR         ResultB, ResultB, r0x80808080
139        M_STR       ResultB, [pDstPred], iDstStep
140
141        ;// 2nd load
142        LDR         Temp1, [pPred0, #4]
143        M_LDR         iPredA0, [pPred0], iPredStep0
144        LDR         iPredB0, [pPred1]
145        LDR         iPredB1, [pPred1, iPredStep1]
146        LDR         Temp2, [pPred0, #4]
147        M_LDR         iPredA1, [pPred0], iPredStep0
148        MVN         iPredB0, iPredB0
149        MVN         iPredB1, iPredB1
150        MOV         iPredA0, iPredA0, LSR #16
151        ORR         iPredA0, iPredA0, Temp1, LSL #16
152        MOV         iPredA1, iPredA1, LSR #16
153        ORR         iPredA1, iPredA1, Temp2, LSL #16
154
155        UHSUB8      ResultA, iPredA0, iPredB0
156        UHSUB8      ResultB, iPredA1, iPredB1
157        EOR         ResultA, ResultA, r0x80808080
158        M_STR       ResultA, [pDstPred], iDstStep
159        EOR         ResultB, ResultB, r0x80808080
160        M_STR       ResultB, [pDstPred], iDstStep
161End2
162        M_END
163
164
165        ;// This function calculates average of 4x4 block
166        ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
167
168        ;// Function header
169        M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
170
171        ;// Code start
172        LDR         r0x80808080, =0x80808080
173
174        ;// 1st load
175        LDR         Temp1, [pPred0, #4]
176        M_LDR       iPredA0, [pPred0], iPredStep0
177        LDR         iPredB0, [pPred1]
178        LDR         iPredB1, [pPred1, iPredStep1]
179        LDR         Temp2, [pPred0, #4]
180        M_LDR       iPredA1, [pPred0], iPredStep0
181
182        MVN         iPredB0, iPredB0
183        MVN         iPredB1, iPredB1
184        MOV         iPredA0, iPredA0, LSR #24
185        ORR         iPredA0, iPredA0, Temp1, LSL #8
186        MOV         iPredA1, iPredA1, LSR #24
187        ORR         iPredA1, iPredA1, Temp2, LSL #8
188        UHSUB8      ResultA, iPredA0, iPredB0
189        UHSUB8      ResultB, iPredA1, iPredB1
190        EOR         ResultA, ResultA, r0x80808080
191        M_STR       ResultA, [pDstPred], iDstStep
192        EOR         ResultB, ResultB, r0x80808080
193        M_STR       ResultB, [pDstPred], iDstStep
194
195        ;// 2nd load
196        LDR         Temp1, [pPred0, #4]
197        M_LDR       iPredA0, [pPred0], iPredStep0
198        LDR         iPredB0, [pPred1]
199        LDR         iPredB1, [pPred1, iPredStep1]
200        LDR         Temp2, [pPred0, #4]
201        M_LDR       iPredA1, [pPred0], iPredStep0
202
203        MVN         iPredB0, iPredB0
204        MVN         iPredB1, iPredB1
205        MOV         iPredA0, iPredA0, LSR #24
206        ORR         iPredA0, iPredA0, Temp1, LSL #8
207        MOV         iPredA1, iPredA1, LSR #24
208        ORR         iPredA1, iPredA1, Temp2, LSL #8
209
210        UHSUB8      ResultA, iPredA0, iPredB0
211        UHSUB8      ResultB, iPredA1, iPredB1
212        EOR         ResultA, ResultA, r0x80808080
213        M_STR       ResultA, [pDstPred], iDstStep
214        EOR         ResultB, ResultB, r0x80808080
215        M_STR       ResultB, [pDstPred], iDstStep
216End3
217        M_END
218
219    ENDIF
220
221    END
222