1;//
2;//
3;// File Name:  armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   12290
6;// Date:       Wednesday, April 9, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13        INCLUDE omxtypes_s.h
14        INCLUDE armCOMM_s.h
15
16        M_VARIANTS ARM1136JS
17
18        EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
19        EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
20
21;// Functions:
22;//     armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
23;//     armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
24;//
25;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
26;// This will do the convertion of data from 16 bit to 8 bit and it also
27;// remove offset and check for saturation.
28;//
29;// Registers used as input for this function
30;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
31;//
32;// Registers preserved for top level function
33;// r4,r5,r6,r8,r9,r14
34;//
35;// Registers modified by the function
36;// r7,r10,r11,r12
37;//
38;// Output registers
39;// r0 - pointer to the destination location
40;// r1 - step size to this destination location
41
42
43DEBUG_ON    SETL {FALSE}
44
45MASK            EQU 0x80808080  ;// Mask is used to implement (a+b+1)/2
46
47;// Declare input registers
48
49pSrc0           RN 0
50srcStep0        RN 1
51
52;// Declare other intermediate registers
53Temp1           RN 4
54Temp2           RN 5
55Temp3           RN 10
56Temp4           RN 11
57pBuf            RN 7
58r0x0fe00fe0     RN 6
59r0x00ff00ff     RN 12
60Count           RN 14
61ValueA0         RN 10
62ValueA1         RN 11
63
64    IF ARM1136JS
65
66
67        ;// Function header
68        M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
69
70        ;// Code start
71        MOV         Count, #4
72        LDR         r0x0fe00fe0, =0x0fe00fe0
73        LDR         r0x00ff00ff, =0x00ff00ff
74LoopStart1
75        LDR         Temp4, [pSrc0, #12]
76        LDR         Temp3, [pSrc0, #8]
77        LDR         Temp2, [pSrc0, #4]
78        M_LDR       Temp1, [pSrc0], srcStep0
79        UQSUB16     Temp4, Temp4, r0x0fe00fe0
80        UQSUB16     Temp3, Temp3, r0x0fe00fe0
81        UQSUB16     Temp2, Temp2, r0x0fe00fe0
82        UQSUB16     Temp1, Temp1, r0x0fe00fe0
83        USAT16      Temp4, #13, Temp4
84        USAT16      Temp3, #13, Temp3
85        USAT16      Temp2, #13, Temp2
86        USAT16      Temp1, #13, Temp1
87        AND         Temp4, r0x00ff00ff, Temp4, LSR #5
88        AND         Temp3, r0x00ff00ff, Temp3, LSR #5
89        AND         Temp2, r0x00ff00ff, Temp2, LSR #5
90        AND         Temp1, r0x00ff00ff, Temp1, LSR #5
91        ORR         ValueA1, Temp3, Temp4, LSL #8
92        ORR         ValueA0, Temp1, Temp2, LSL #8
93        SUBS        Count, Count, #1
94        STRD        ValueA0, [pBuf], #8
95        BGT         LoopStart1
96End1
97        SUB        pSrc0, pBuf, #32
98        MOV        srcStep0, #8
99
100        M_END
101
102
103        ;// Function header
104        M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
105
106        ;// Code start
107        LDR         r0x0fe00fe0, =0x0fe00fe0
108        LDR         r0x00ff00ff, =0x00ff00ff
109        MOV         Count, #2
110
111LoopStart
112        LDR         Temp4, [pSrc0, #12]
113        LDR         Temp3, [pSrc0, #8]
114        LDR         Temp2, [pSrc0, #4]
115        M_LDR       Temp1, [pSrc0], srcStep0
116
117        UQSUB16     Temp4, Temp4, r0x0fe00fe0
118        UQSUB16     Temp3, Temp3, r0x0fe00fe0
119        UQSUB16     Temp2, Temp2, r0x0fe00fe0
120        UQSUB16     Temp1, Temp1, r0x0fe00fe0
121
122        USAT16      Temp4, #13, Temp4
123        USAT16      Temp3, #13, Temp3
124        USAT16      Temp2, #13, Temp2
125        USAT16      Temp1, #13, Temp1
126
127        AND         Temp4, r0x00ff00ff, Temp4, LSR #5
128        AND         Temp3, r0x00ff00ff, Temp3, LSR #5
129        AND         Temp2, r0x00ff00ff, Temp2, LSR #5
130        AND         Temp1, r0x00ff00ff, Temp1, LSR #5
131        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]
132        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]
133
134        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
135
136        STR         Temp1, [pBuf], #8
137        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
138        STR         Temp2, [pBuf], #-4
139
140        LDR         Temp4, [pSrc0, #12]
141        LDR         Temp3, [pSrc0, #8]
142        LDR         Temp2, [pSrc0, #4]
143        M_LDR       Temp1, [pSrc0], srcStep0
144
145        UQSUB16     Temp4, Temp4, r0x0fe00fe0
146        UQSUB16     Temp3, Temp3, r0x0fe00fe0
147        UQSUB16     Temp2, Temp2, r0x0fe00fe0
148        UQSUB16     Temp1, Temp1, r0x0fe00fe0
149
150        USAT16      Temp4, #13, Temp4
151        USAT16      Temp3, #13, Temp3
152        USAT16      Temp2, #13, Temp2
153        USAT16      Temp1, #13, Temp1
154
155        AND         Temp4, r0x00ff00ff, Temp4, LSR #5
156        AND         Temp3, r0x00ff00ff, Temp3, LSR #5
157        AND         Temp2, r0x00ff00ff, Temp2, LSR #5
158        AND         Temp1, r0x00ff00ff, Temp1, LSR #5
159        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]
160        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]
161
162        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
163        SUBS        Count, Count, #1
164        STR         Temp1, [pBuf], #8
165        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
166        STR         Temp2, [pBuf], #4
167
168        BGT         LoopStart
169End2
170        SUB         pSrc0, pBuf, #32-8
171        MOV         srcStep0, #4
172
173        M_END
174
175    ENDIF
176
177    END
178