1///*****************************************************************************
2//*
3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************/
18///*******************************************************************************
19//* @file
20//*  ihevc_deblk_luma_horz.s
21//*
22//* @brief
23//*  contains function definitions for inter prediction  interpolation.
24//* functions are coded using neon  intrinsics and can be compiled using
25
26//* rvct
27//*
28//* @author
29//*  anand s
30//*
31//* @par list of functions:
32//*
33//*
34//* @remarks
35//*  none
36//*
37//void ihevc_deblk_chroma_horz(UWORD8 *pu1_src,
38//                             WORD32 src_strd,
39//                             WORD32 quant_param_p,
40//                             WORD32 quant_param_q,
41//                             WORD32 qp_offset_u,
42//                             WORD32 qp_offset_v,
43//                             WORD32 tc_offset_div2,
44//                             WORD32 filter_flag_p,
45//                             WORD32 filter_flag_q)
46//
47
48.text
49.align 4
50.include "ihevc_neon_macros.s"
51
52
53
54.extern gai4_ihevc_qp_table
55.extern gai4_ihevc_tc_table
56.globl ihevc_deblk_chroma_horz_av8
57
58.type ihevc_deblk_chroma_horz_av8, %function
59
60ihevc_deblk_chroma_horz_av8:
61    sxtw        x4,w4
62    sxtw        x5,w5
63    sxtw        x6,w6
64    ldr         w9, [sp]
65    sxtw        x9,w9
66    push_v_regs
67    stp         x19, x20,[sp,#-16]!
68    mov         x10, x4
69    mov         x8, x7
70    mov         x7, x5
71    mov         x4, x6
72
73    sub         x12,x0,x1
74    ld1         {v0.8b},[x0]
75    sub         x5,x12,x1
76    add         x6,x0,x1
77    add         x1,x2,x3
78    uxtl        v0.8h, v0.8b
79    ld1         {v2.8b},[x12]
80    add         x2,x1,#1
81    ld1         {v4.8b},[x5]
82    ld1         {v16.8b},[x6]
83    adds        x1,x10,x2,asr #1
84    uxtl        v2.8h, v2.8b
85    adrp        x3, :got:gai4_ihevc_qp_table
86    ldr         x3, [x3, #:got_lo12:gai4_ihevc_qp_table]
87    bmi         l1.3312
88    cmp         x1,#0x39
89    bgt         lbl78
90    ldr         w1, [x3,x1,lsl #2]
91lbl78:
92    sub         x20,x1,#6
93    csel        x1, x20, x1,gt
94l1.3312:
95    adds        x2,x7,x2,asr #1
96    uxtl        v4.8h, v4.8b
97    bmi         l1.3332
98    cmp         x2,#0x39
99    bgt         lbl85
100    ldr         w2, [x3,x2,lsl #2]
101lbl85:
102    sub         x20,x2,#6
103    csel        x2, x20, x2,gt
104l1.3332:
105    add         x1,x1,x4,lsl #1
106    sub         v6.8h,  v0.8h ,  v2.8h
107    add         x3,x1,#2
108    cmp         x3,#0x35
109    mov         x20,#0x35
110    csel        x1, x20, x1,gt
111    shl         v6.8h, v6.8h,#2
112    uxtl        v16.8h, v16.8b
113    bgt         l1.3368
114    adds        x3,x1,#2
115    add         x20,x1,#2
116    csel        x1, x20, x1,pl
117    mov         x20,#0
118    csel        x1, x20, x1,mi
119l1.3368:
120    adrp        x3, :got:gai4_ihevc_tc_table
121    ldr         x3, [x3, #:got_lo12:gai4_ihevc_tc_table]
122    add         v4.8h,  v6.8h ,  v4.8h
123    add         x2,x2,x4,lsl #1
124    sub         v6.8h,  v4.8h ,  v16.8h
125    add         x4,x2,#2
126    ldr         w1, [x3,x1,lsl #2]
127    cmp         x4,#0x35
128    mov         x20,#0x35
129    csel        x2, x20, x2,gt
130    bgt         l1.3412
131    adds        x4,x2,#2
132    add         x20,x2,#2
133    csel        x2, x20, x2,pl
134    mov         x20,#0
135    csel        x2, x20, x2,mi
136l1.3412:
137
138
139    ldr         w2, [x3,x2,lsl #2]
140    cmp         x8,#0
141    dup         v31.8h,w2
142    dup         v30.8h,w1
143    sub         x20,x1,#0
144    neg         x1, x20
145    srshr       v6.8h, v6.8h,#3
146    dup         v28.8h,w1
147    sub         x20,x2,#0
148    neg         x1, x20
149    zip1        v4.8h, v30.8h, v31.8h
150    dup         v29.8h,w1
151
152    zip1        v18.8h, v28.8h, v29.8h
153
154    smin        v16.8h,  v6.8h ,  v4.8h
155    smax        v4.8h,  v18.8h ,  v16.8h
156    add         v2.8h,  v2.8h ,  v4.8h
157    sub         v0.8h,  v0.8h ,  v4.8h
158    sqxtun      v2.8b, v2.8h
159    sqxtun      v0.8b, v0.8h
160    beq         l1.3528
161    st1         {v2.8b},[x12]
162l1.3528:
163    cmp         x9,#0
164    beq         l1.3540
165    st1         {v0.8b},[x0]
166l1.3540:
167    ldp         x19, x20,[sp],#16
168    pop_v_regs
169    ret
170
171
172
173
174