1///*****************************************************************************
2//*
3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************/
18///**
19//*******************************************************************************
20//* @file
21//*  ihevc_intra_pred_luma_mode_18_34_neon.s
22//*
23//* @brief
24//*  contains function definitions for intra prediction dc filtering.
25//* functions are coded using neon  intrinsics and can be compiled using
26
27//* rvct
28//*
29//* @author
30//*  yogeswaran rs
31//*
32//* @par list of functions:
33//*
34//*
35//* @remarks
36//*  none
37//*
38//*******************************************************************************
39//*/
40///**
41//*******************************************************************************
42//*
43//* @brief
44//*    luma intraprediction filter for dc input
45//*
46//* @par description:
47//*
48//* @param[in] pu1_ref
49//*  uword8 pointer to the source
50//*
51//* @param[out] pu1_dst
52//*  uword8 pointer to the destination
53//*
54//* @param[in] src_strd
55//*  integer source stride
56//*
57//* @param[in] dst_strd
58//*  integer destination stride
59//*
60//* @param[in] pi1_coeff
61//*  word8 pointer to the planar coefficients
62//*
63//* @param[in] nt
64//*  size of tranform block
65//*
66//* @param[in] mode
67//*  type of filtering
68//*
69//* @returns
70//*
71//* @remarks
72//*  none
73//*
74//*******************************************************************************
75//*/
76
77//void ihevc_intra_pred_chroma_mode_18_34(uword8 *pu1_ref,
78//                                      word32 src_strd,
79//                                      uword8 *pu1_dst,
80//                                      word32 dst_strd,
81//                                      word32 nt,
82//                                      word32 mode)
83//
84//**************variables vs registers*****************************************
85//x0 => *pu1_ref
86//x1 => src_strd
87//x2 => *pu1_dst
88//x3 => dst_strd
89
90//stack contents from #40
91//    nt
92//    mode
93//    pi1_coeff
94
95.text
96.align 4
97.include "ihevc_neon_macros.s"
98
99
100
101.globl ihevc_intra_pred_chroma_mode_18_34_av8
102
103.type ihevc_intra_pred_chroma_mode_18_34_av8, %function
104
105ihevc_intra_pred_chroma_mode_18_34_av8:
106
107    // stmfd sp!, {x4-x12, x14}    //stack stores the values of the arguments
108
109    stp         x19, x20,[sp,#-16]!
110
111
112    cmp         x4,#4
113    beq         mode2_4
114
115    mov         x12,x4
116    mov         x11,x4
117    add         x0,x0,x4,lsl #2
118
119    cmp         x5,#0x22
120    mov         x10,x2
121
122    add         x0,x0,#4
123
124    sub         x20,x0,#4
125    csel        x0, x20, x0,ne
126    mov         x20,#2
127    csel        x6, x20, x6,eq
128    mov         x20,#-2
129    csel        x6, x20, x6,ne
130    mov         x8,x0
131
132
133kernel:
134
135
136    ld1         {v0.8b, v1.8b},[x8],x6
137    st1         {v0.8b, v1.8b},[x10],x3
138    ld1         {v2.8b, v3.8b},[x8],x6
139    st1         {v2.8b, v3.8b},[x10],x3
140    ld1         {v4.8b, v5.8b},[x8],x6
141    st1         {v4.8b, v5.8b},[x10],x3
142    ld1         {v6.8b, v7.8b},[x8],x6
143    st1         {v6.8b, v7.8b},[x10],x3
144    ld1         {v16.8b, v17.8b},[x8],x6
145    st1         {v16.8b, v17.8b},[x10],x3
146    ld1         {v18.8b, v19.8b},[x8],x6
147    st1         {v18.8b, v19.8b},[x10],x3
148    ld1         {v20.8b, v21.8b},[x8],x6
149    st1         {v20.8b, v21.8b},[x10],x3
150    ld1         {v22.8b, v23.8b},[x8],x6
151    st1         {v22.8b, v23.8b},[x10],x3
152
153    subs        x12,x12,#8
154    bne         kernel
155
156    cmp         x11,#16
157    add         x8,x0,#16
158    add         x10,x2,#16
159    sub         x11, x11,#16
160    mov         x12,#16
161    beq         kernel
162    b           end_func
163
164mode2_4:
165
166    add         x0,x0,#20
167    cmp         x5,#0x22
168    sub         x20,x0,#4
169    csel        x0, x20, x0,ne
170
171    mov         x20,#2
172    csel        x8, x20, x8,eq
173    mov         x20,#-2
174    csel        x8, x20, x8,ne
175
176    ld1         {v0.8b},[x0],x8
177    st1         {v0.2s},[x2],x3
178
179    ld1         {v0.8b},[x0],x8
180    st1         {v0.2s},[x2],x3
181
182    ld1         {v0.8b},[x0],x8
183    st1         {v0.2s},[x2],x3
184
185    ld1         {v0.8b},[x0],x8
186    st1         {v0.2s},[x2],x3
187
188end_func:
189    // ldmfd sp!,{x4-x12,x15}                  //reload the registers from sp
190    ldp         x19, x20,[sp],#16
191
192    ret
193
194
195
196
197
198
199