1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@/*******************************************************************************
20@* @file
21@*  ihevc_deblk_luma_vert.s
22@*
23@* @brief
24@*  contains function definitions for inter prediction  interpolation.
25@* functions are coded using neon  intrinsics and can be compiled using
26
27@* rvct
28@*
29@* @author
30@*  anand s
31@*
32@* @par list of functions:
33@*
34@*
35@* @remarks
36@*  none
37@*
38@*******************************************************************************/
39
40.equ    qp_offset_u_offset,     40
41.equ    qp_offset_v_offset,     44
42.equ    tc_offset_div2_offset,  48
43.equ    filter_p_offset,        52
44.equ    filter_q_offset,        56
45
46.text
47.align 4
48
49
50
51
52
53.extern gai4_ihevc_qp_table
54.extern gai4_ihevc_tc_table
55.globl ihevc_deblk_chroma_vert_a9q
56
57gai4_ihevc_qp_table_addr:
58.long gai4_ihevc_qp_table - ulbl1 - 8
59
60gai4_ihevc_tc_table_addr:
61.long gai4_ihevc_tc_table  - ulbl2 - 8
62
63.type ihevc_deblk_chroma_vert_a9q, %function
64
65ihevc_deblk_chroma_vert_a9q:
66    push        {r4-r12,lr}
67    sub         r8,r0,#4
68    add         r2,r2,r3
69    vld1.8      {d5},[r8],r1
70    add         r2,r2,#1
71    vld1.8      {d17},[r8],r1
72    ldr         r7,[sp,#qp_offset_u_offset]
73    vld1.8      {d16},[r8],r1
74    ldr         r4,[sp,#filter_q_offset]
75    vld1.8      {d4},[r8]
76    ldr         r5,[sp,#tc_offset_div2_offset]
77    vtrn.8      d5,d17
78    adds        r3,r7,r2,asr #1
79    vtrn.8      d16,d4
80    ldr         r7,gai4_ihevc_qp_table_addr
81ulbl1:
82    add         r7,r7,pc
83    ldr         r12,[sp,#filter_p_offset]
84    ldr         r6,[sp,#qp_offset_v_offset]
85    bmi         l1.2944
86    cmp         r3,#0x39
87    ldrle       r3,[r7,r3,lsl #2]
88    subgt       r3,r3,#6
89l1.2944:
90    vtrn.16     d5,d16
91    adds        r2,r6,r2,asr #1
92    vtrn.16     d17,d4
93    bmi         l1.2964
94    cmp         r2,#0x39
95    ldrle       r2,[r7,r2,lsl #2]
96    subgt       r2,r2,#6
97l1.2964:
98    vtrn.32     d5,d17
99    add         r3,r3,r5,lsl #1
100    vtrn.32     d16,d4
101    add         r6,r3,#2
102    vmovl.u8    q9,d17
103    cmp         r6,#0x35
104    movgt       r3,#0x35
105    bgt         l1.2996
106    adds        r6,r3,#2
107    addpl       r3,r3,#2
108    movmi       r3,#0
109l1.2996:
110    vsubl.u8    q0,d17,d16
111    ldr         r6,gai4_ihevc_tc_table_addr
112ulbl2:
113    add         r6,r6,pc
114    vshl.i16    q0,q0,#2
115    add         r2,r2,r5,lsl #1
116    add         r5,r2,#2
117    vaddw.u8    q0,q0,d5
118    cmp         r5,#0x35
119    ldr         r3,[r6,r3,lsl #2]
120    vsubw.u8    q2,q0,d4
121    movgt       r2,#0x35
122    bgt         l1.3036
123    adds        r5,r2,#2
124    addpl       r2,r2,#2
125    movmi       r2,#0
126l1.3036:
127
128
129    vrshr.s16   q3,q2,#3
130    vdup.16     d2,r3
131    ldr         r2,[r6,r2,lsl #2]
132    rsb         r3,r3,#0
133    cmp         r12,#0
134    vdup.16     d3,r2
135    rsb         r2,r2,#0
136    vdup.16     d30,r3
137    vdup.16     d31,r2
138
139
140    vmin.s16    q2,q3,q1
141    vmax.s16    q1,q15,q2
142
143    vmovl.u8    q3,d16
144
145    vadd.i16    q0,q3,q1
146    vsub.i16    q1,q9,q1
147    vqmovun.s16 d0,q0
148    sub         r2,r0,#2
149    vqmovun.s16 d1,q1
150    vtrn.32     d0,d1
151    vtrn.8      d0,d1
152    beq         l1.3204
153
154    vst1.16     {d0[0]},[r2],r1
155    vst1.16     {d1[0]},[r2],r1
156    vst1.16     {d0[1]},[r2],r1
157    vst1.16     {d1[1]},[r2]
158l1.3204:
159    cmp         r4,#0
160    beq         l1.3228
161    vst1.16     {d0[2]},[r0],r1
162    vst1.16     {d1[2]},[r0],r1
163    vst1.16     {d0[3]},[r0],r1
164    vst1.16     {d1[3]},[r0]
165l1.3228:
166    pop         {r4-r12,pc}
167
168
169
170