1///*****************************************************************************
2//*
3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************/
18///**
19///*******************************************************************************
20//* @file
21//*  ihevc_deblk_luma_vert.s
22//*
23//* @brief
24//*  contains function definitions for inter prediction  interpolation.
25//* functions are coded using neon  intrinsics and can be compiled using
26
27//* rvct
28//*
29//* @author
30//*  anand s
31//*
32//* @par list of functions:
33//*
34//*
35//* @remarks
36//*  none
37//*
38//*******************************************************************************/
39//void ihevc_deblk_chroma_vert(UWORD8 *pu1_src,
40//                             WORD32 src_strd,
41//                             WORD32 quant_param_p,
42//                             WORD32 quant_param_q,
43//                             WORD32 qp_offset_u,
44//                             WORD32 qp_offset_v,
45//                             WORD32 tc_offset_div2,
46//                             WORD32 filter_flag_p,
47//                             WORD32 filter_flag_q)
48
49.text
50.align 4
51.include "ihevc_neon_macros.s"
52
53
54
55.extern gai4_ihevc_qp_table
56.extern gai4_ihevc_tc_table
57.globl ihevc_deblk_chroma_vert_av8
58
59.type ihevc_deblk_chroma_vert_av8, %function
60
61ihevc_deblk_chroma_vert_av8:
62    sxtw        x4,w4
63    sxtw        x5,w5
64    sxtw        x6,w6
65    mov         x15,x5
66    mov         x5,x6
67    mov         x6,x15
68    mov         x12, x7
69    mov         x7, x4
70    ldr         w4, [sp]
71
72    push_v_regs
73    stp         x19, x20,[sp,#-16]!
74
75    sub         x8,x0,#4
76    add         x2,x2,x3
77    ld1         {v5.8b},[x8],x1
78    add         x2,x2,#1
79    ld1         {v17.8b},[x8],x1
80    ld1         {v16.8b},[x8],x1
81    ld1         {v4.8b},[x8]
82
83    trn1        v29.8b, v5.8b, v17.8b
84    trn2        v17.8b, v5.8b, v17.8b
85    mov         v5.d[0], v29.d[0]
86    adds        x3,x7,x2,asr #1
87    trn1        v29.8b, v16.8b, v4.8b
88    trn2        v4.8b, v16.8b, v4.8b
89    mov         v16.d[0], v29.d[0]
90    adrp        x7, :got:gai4_ihevc_qp_table
91    ldr         x7, [x7, #:got_lo12:gai4_ihevc_qp_table]
92
93
94    bmi         l1.2944
95    cmp         x3,#0x39
96    bgt         lbl78
97    ldr         w3, [x7,x3,lsl #2]
98    sxtw        x3,w3
99lbl78:
100    sub         x20,x3,#6
101    csel        x3, x20, x3,gt
102l1.2944:
103    trn1        v29.4h, v5.4h, v16.4h
104    trn2        v16.4h, v5.4h, v16.4h
105    mov         v5.d[0], v29.d[0]
106    adds        x2,x6,x2,asr #1
107    trn1        v29.4h, v17.4h, v4.4h
108    trn2        v4.4h, v17.4h, v4.4h
109    mov         v17.d[0], v29.d[0]
110    bmi         l1.2964
111    cmp         x2,#0x39
112    bgt         lbl86
113    ldr         w2, [x7,x2,lsl #2]
114    sxtw        x2,w2
115lbl86:
116    sub         x20,x2,#6
117    csel        x2, x20, x2,gt
118l1.2964:
119    trn1        v29.2s, v5.2s, v17.2s
120    trn2        v17.2s, v5.2s, v17.2s
121    mov         v5.d[0], v29.d[0]
122    add         x3,x3,x5,lsl #1
123    trn1        v29.2s, v16.2s, v4.2s
124    trn2        v4.2s, v16.2s, v4.2s
125    mov         v16.d[0], v29.d[0]
126    add         x6,x3,#2
127    uxtl        v18.8h, v17.8b
128    cmp         x6,#0x35
129    mov         x20,#0x35
130    csel        x3, x20, x3,gt
131    bgt         l1.2996
132    adds        x6,x3,#2
133    add         x20,x3,#2
134    csel        x3, x20, x3,pl
135    mov         x20,#0
136    csel        x3, x20, x3,mi
137l1.2996:
138    usubl       v0.8h, v17.8b, v16.8b
139    adrp        x6, :got:gai4_ihevc_tc_table
140    ldr         x6, [x6, #:got_lo12:gai4_ihevc_tc_table]
141    shl         v0.8h, v0.8h,#2
142    add         x2,x2,x5,lsl #1
143    add         x5,x2,#2
144    uaddw       v0.8h,  v0.8h ,  v5.8b
145    cmp         x5,#0x35
146    ldr         w3, [x6,x3,lsl #2]
147    sxtw        x3,w3
148    usubw       v4.8h,  v0.8h ,  v4.8b
149    mov         x20,#0x35
150    csel        x2, x20, x2,gt
151    bgt         l1.3036
152    adds        x5,x2,#2
153    add         x20,x2,#2
154    csel        x2, x20, x2,pl
155    mov         x20,#0
156    csel        x2, x20, x2,mi
157l1.3036:
158
159
160    srshr       v6.8h, v4.8h,#3
161    dup         v2.4h,w3
162    ldr         w2, [x6,x2,lsl #2]
163    sxtw        x2,w2
164    sub         x20,x3,#0
165    neg         x3, x20
166    cmp         x12,#0
167    dup         v3.4h,w2
168    sub         x20,x2,#0
169    neg         x2, x20
170    dup         v30.4h,w3
171    dup         v31.4h,w2
172
173    mov         v30.d[1],v31.d[0]
174    mov         v2.d[1],v3.d[0]
175
176    smin        v4.8h,  v6.8h ,  v2.8h
177    smax        v2.8h,  v30.8h ,  v4.8h
178
179    uxtl        v6.8h, v16.8b
180
181    add         v0.8h,  v6.8h ,  v2.8h
182    sub         v2.8h,  v18.8h ,  v2.8h
183    sqxtun      v0.8b, v0.8h
184    sub         x2,x0,#2
185    sqxtun      v1.8b, v2.8h
186    trn1        v29.2s, v0.2s, v1.2s
187    trn2        v1.2s, v0.2s, v1.2s
188    mov         v0.d[0], v29.d[0]
189    trn1        v29.8b, v0.8b, v1.8b
190    trn2        v1.8b, v0.8b, v1.8b
191    mov         v0.d[0], v29.d[0]
192    beq         l1.3204
193
194    st1         {v0.h}[0],[x2],x1
195    st1         {v1.h}[0],[x2],x1
196    st1         {v0.h}[1],[x2],x1
197    st1         {v1.h}[1],[x2]
198l1.3204:
199    cmp         x4,#0
200    beq         l1.3228
201    st1         {v0.h}[2],[x0],x1
202    st1         {v1.h}[2],[x0],x1
203    st1         {v0.h}[3],[x0],x1
204    st1         {v1.h}[3],[x0]
205l1.3228:
206    ldp         x19, x20,[sp],#16
207    pop_v_regs
208    ret
209
210
211
212