1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@*******************************************************************************
20@* @file
21@*  ihevc_intra_pred_chroma_ver_neon.s
22@*
23@* @brief
24@*  contains function definitions for intra prediction dc filtering.
25@* functions are coded using neon  intrinsics and can be compiled using
26
27@* rvct
28@*
29@* @author
30@*  yogeswaran rs
31@*
32@* @par list of functions:
33@*
34@*
35@* @remarks
36@*  none
37@*
38@*******************************************************************************
39@*/
40@/**
41@*******************************************************************************
42@*
43@* @brief
44@*    luma intraprediction filter for dc input
45@*
46@* @par description:
47@*
48@* @param[in] pu1_ref
49@*  uword8 pointer to the source
50@*
51@* @param[out] pu1_dst
52@*  uword8 pointer to the destination
53@*
54@* @param[in] src_strd
55@*  integer source stride
56@*
57@* @param[in] dst_strd
58@*  integer destination stride
59@*
60@* @param[in] nt
61@*  size of tranform block
62@*
63@* @param[in] mode
64@*  type of filtering
65@*
66@* @returns
67@*
68@* @remarks
69@*  none
70@*
71@*******************************************************************************
72@*/
73
74@void ihevc_intra_pred_chroma_ver(uword8 *pu1_ref,
75@        word32 src_strd,
76@        uword8 *pu1_dst,
77@        word32 dst_strd,
78@        word32 nt,
79@        word32 mode)
80@**************variables vs registers*****************************************
81@r0 => *pu1_ref
82@r1 => src_strd
83@r2 => *pu1_dst
84@r3 => dst_strd
85
86@stack contents from #40
87@   nt
88@   mode
89
90.text
91.align 4
92
93
94
95
96.globl ihevc_intra_pred_chroma_ver_a9q
97
98.type ihevc_intra_pred_chroma_ver_a9q, %function
99
100ihevc_intra_pred_chroma_ver_a9q:
101
102    stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
103
104    ldr         r4,[sp,#40]                 @loads nt
105    lsl         r5, r4, #2                  @4nt
106
107
108    cmp         r4, #8
109    beq         blk_8
110    blt         blk_4
111
112copy_16:
113    add         r5, r5, #2                  @2nt+2
114    add         r6, r0, r5                  @&src[2nt+1]
115
116    add         r5, r2, r3                  @pu1_dst + dst_strd
117    vld2.8      {d20,d21}, [r6]!            @16 loads (col 0:15)
118    add         r8, r5, r3
119
120    add         r10, r8, r3
121    vld2.8      {d22,d23}, [r6]             @16 loads (col 16:31)
122    lsl         r11, r3, #2
123
124    add         r11, r11, #0xfffffff0
125
126
127    vst2.8      {d20,d21}, [r2]!
128    vst2.8      {d20,d21}, [r5]!
129    vst2.8      {d20,d21}, [r8]!
130    vst2.8      {d20,d21}, [r10]!
131
132    vst2.8      {d22,d23}, [r2], r11
133    vst2.8      {d22,d23}, [r5], r11
134    vst2.8      {d22,d23}, [r8], r11
135    vst2.8      {d22,d23}, [r10], r11
136
137    subs        r4, r4, #4
138
139kernel_copy_16:
140    vst2.8      {d20,d21}, [r2]!
141    vst2.8      {d20,d21}, [r5]!
142    vst2.8      {d20,d21}, [r8]!
143    vst2.8      {d20,d21}, [r10]!
144
145    vst2.8      {d22,d23}, [r2], r11
146    vst2.8      {d22,d23}, [r5], r11
147    vst2.8      {d22,d23}, [r8], r11
148    vst2.8      {d22,d23}, [r10], r11
149
150    subs        r4, r4, #4
151
152
153    vst2.8      {d20,d21}, [r2]!
154    vst2.8      {d20,d21}, [r5]!
155    vst2.8      {d20,d21}, [r8]!
156    vst2.8      {d20,d21}, [r10]!
157
158    vst2.8      {d22,d23}, [r2], r11
159    vst2.8      {d22,d23}, [r5], r11
160    vst2.8      {d22,d23}, [r8], r11
161    vst2.8      {d22,d23}, [r10], r11
162
163    subs        r4, r4, #4
164
165    vst2.8      {d20,d21}, [r2]!
166    vst2.8      {d20,d21}, [r5]!
167    vst2.8      {d20,d21}, [r8]!
168    vst2.8      {d20,d21}, [r10]!
169
170    vst2.8      {d22,d23}, [r2], r11
171    vst2.8      {d22,d23}, [r5], r11
172    vst2.8      {d22,d23}, [r8], r11
173    vst2.8      {d22,d23}, [r10], r11
174
175    subs        r4, r4, #4
176    bne         kernel_copy_16
177
178    b           end_func
179
180blk_8:
181
182    add         r5, r5, #2                  @2nt+2
183    add         r6, r0, r5                  @&src[2nt+1]
184
185    add         r5, r2, r3                  @pu1_dst + dst_strd
186    vld2.8      {d20,d21}, [r6]!            @16 loads (col 0:15)
187    add         r8, r5, r3
188
189    add         r10, r8, r3
190    vld2.8      {d22,d23}, [r6]             @16 loads (col 16:31)
191
192    lsl         r11,r3,#2
193
194    vst2.8      {d20,d21}, [r2],r11
195    vst2.8      {d20,d21}, [r5],r11
196    vst2.8      {d20,d21}, [r8],r11
197    vst2.8      {d20,d21}, [r10],r11
198
199    vst2.8      {d20,d21}, [r2]
200    vst2.8      {d20,d21}, [r5]
201    vst2.8      {d20,d21}, [r8]
202    vst2.8      {d20,d21}, [r10]
203
204    subs        r4, r4, #8
205    beq         end_func
206
207blk_4:
208
209    @lsl        r5, r4, #2          @4nt
210    add         r5, r5, #2                  @2nt+2
211    add         r6, r0, r5                  @&src[2nt+1]
212
213    vld1.8      {d0},[r6]
214    add         r5, r2, r3                  @pu1_dst + dst_strd
215
216    vst1.8      {d0},[r2]
217    add         r8, r5, r3
218    vst1.8      {d0},[r5]
219    add         r10, r8, r3
220    vst1.8      {d0},[r8]
221    vst1.8      {d0},[r10]
222
223
224
225end_func:
226    ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
227
228
229
230