18d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/****************************************************************************** 28d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * 38d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Copyright (C) 2015 The Android Open Source Project 48d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * 58d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Licensed under the Apache License, Version 2.0 (the "License"); 68d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * you may not use this file except in compliance with the License. 78d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * You may obtain a copy of the License at: 88d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * 98d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * http://www.apache.org/licenses/LICENSE-2.0 108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * 118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Unless required by applicable law or agreed to in writing, software 128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * distributed under the License is distributed on an "AS IS" BASIS, 138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * See the License for the specific language governing permissions and 158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * limitations under the License. 168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * 178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ ***************************************************************************** 188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*/ 208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/*****************************************************************************/ 218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* */ 228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* File Name : ih264_deblk_luma_a9.s */ 238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* */ 248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* Description : Contains function definitions for deblocking luma */ 258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* edge. Functions are coded in NEON assembly and can */ 268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* be compiled using ARM RVDS. */ 278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* */ 288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* List of Functions : ih264_deblk_luma_vert_bs4_a9() */ 298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* ih264_deblk_luma_vert_bslt4_a9() */ 308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* ih264_deblk_luma_horz_bs4_a9() */ 318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* ih264_deblk_luma_horz_bslt4_a9() */ 328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* ih264_deblk_luma_vert_bs4_mbaff_a9() */ 338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* ih264_deblk_luma_vert_bslt4_mbaff_a9() */ 348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* */ 358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* Issues / Problems : None */ 368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* */ 378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* Revision History : */ 388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* */ 398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* DD MM YYYY Author(s) Changes (Describe the changes made) */ 408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* 28 11 2013 Ittiam Draft */ 418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* 05 01 2015 Kaushik Added double-call functions for */ 428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* Senthoor vertical deblocking. */ 438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/* */ 448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/*****************************************************************************/ 458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.text 488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.p2align 2 498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 507497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@** 518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @brief 548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Performs filtering of a luma block horizontal edge for cases where the 558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* boundary strength is less than 4 568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @par Description: 588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* This operation is described in Sec. 8.7.2.4 under the title 598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r0 - pu1_src 628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Pointer to the src sample q0 638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r1 - src_strd 658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Source stride 668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r2 - alpha 688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Alpha Value for the boundary 698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r3 - beta 718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Beta Value for the boundary 728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] sp(0) - u4_bs 748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Packed Boundary strength array 758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] sp(4) - pu1_cliptab 778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* tc0_table 788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @returns 808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @remarks 838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 867497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@* 878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_luma_horz_bslt4_a9 898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_luma_horz_bslt4_a9: 918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stmfd sp!, {r4-r7, lr} 938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldrd r4, r5, [sp, #0x14] @r4 = ui_Bs , r5 = *puc_ClpTab 958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpush {d8 - d15} 968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, r1, lsl #1 @R1 = uc_Horizonpad 978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, r1 @r0 pointer to p2 988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rev r4, r4 @ 998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {q5}, [r0], r1 @p2 values are loaded into q5 1008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.32 d12[0], r4 @d12[0] = ui_Bs 1018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov r6, r0 @keeping backup of pointer to p1 1028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {q4}, [r0], r1 @p1 values are loaded into q4 1038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov r7, r0 @keeping backup of pointer to p0 1048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {q3}, [r0], r1 @p0 values are loaded into q3 1058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovl.u8 q6, d12 @q6 = uc_Bs in each 16 bt scalar 1068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {q0}, [r0], r1 @q0 values are loaded into q0 1078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q13, q4, q3 @Q13 = ABS(p1 - p0) 1088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {q1}, [r0], r1 @q1 values are loaded into q1 1098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q11, q3, q0 @Q11 = ABS(p0 - q0) 1108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.32 d16[0], [r5] @D16[0] contains cliptab 1118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q12, q1, q0 @Q12 = ABS(q1 - q0) 1128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {q2}, [r0], r1 @q2 values are loaded into q2 1138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtbl.8 d14, {d16}, d12 @ 1148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.8 q10, r2 @Q10 contains alpha 1158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.8 q8, r3 @Q8 contains beta 1168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovl.u16 q6, d12 @ 1178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovl.u16 q7, d14 @ 1188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q14, q5, q3 @Q14 = Ap = ABS(p2 - p0) 1198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q15, q2, q0 @Q15 = Aq = ABS(q2 - q0) 1208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.s32 q6, q6, #0 @Q6 = (us_Bs > 0) 1218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsli.32 q7, q7, #8 @ 1228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q9, q11, q10 @Q9 = ( ABS(p0 - q0) >= Alpha ) 1238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q12, q12, q8 @Q12=( ABS(q1 - q0) >= Beta ) 1248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q13, q13, q8 @Q13=( ABS(p1 - p0) >= Beta ) 1258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.u8 q10, q8, q14 @Q10=(Ap<Beta) 1268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.u8 q11, q8, q15 @Q11=(Aq<Beta) 1278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsli.32 q7, q7, #16 @Q7 = C0 1288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q9, q9, q12 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) 1298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubl.u8 q15, d1, d7 @ 1308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubl.u8 q12, d0, d6 @Q15,Q12 = (q0 - p0) 1318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q9, q9, q13 @Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) 1328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubl.u8 q14, d8, d2 @Q14 = (p1 - q1)L 1338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshl.i16 q13, q15, #2 @Q13 = (q0 - p0)<<2 1348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshl.i16 q12, q12, #2 @Q12 = (q0 - p0)<<2 1358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubl.u8 q15, d9, d3 @Q15 = (p1 - q1)H 1368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic q6, q6, q9 @final condition 1378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q12, q12, q14 @ 1388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q13, q13, q15 @Q13,Q12 = [ (q0 - p0)<<2 ] + (p1 - q1) 1398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.i8 q9, q7, q10 @Q9 = C0 + (Ap < Beta) 1408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrhadd.u8 q8, q3, q0 @Q8 = ((p0+q0+1) >> 1) 1418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqrshrn.s16 d24, q12, #3 @ 1428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqrshrn.s16 d25, q13, #3 @Q12 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3 1438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.i8 q9, q9, q11 @Q9 = C0 + (Ap < Beta) + (Aq < Beta) 1448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand.i8 q10, q10, q6 @ 1458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand.i8 q11, q11, q6 @ 1468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabs.s8 q13, q12 @Q13 = ABS (i_macro) 1478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q14, d17, d11 @ 1488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q5, d16, d10 @Q14,Q5 = p2 + (p0+q0+1)>>1 1498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q15, d17, d5 @ 1508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.u8 q9, q13, q9 @Q9 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro) 1518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshll.u8 q13, d9, #1 @ 1528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q2, d16, d4 @Q15,Q2 = q2 + (p0+q0+1)>>1 1538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshll.u8 q8, d8, #1 @Q13,Q8 = (p1<<1) 1548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand q9, q9, q6 @Making delta zero in places where values shouldn be filterd 1558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.i16 q14, q14, q13 @Q14,Q5 = [p2 + (p0+q0+1)>>1] - (p1<<1) 1568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.i16 q5, q5, q8 @ 1578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshll.u8 q8, d2, #1 @ 1588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshll.u8 q13, d3, #1 @Q13,Q8 = (q1<<1) 1598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d29, q14, #1 @ 1608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d28, q5, #1 @Q14 = i_macro_p1 1618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.i16 q2, q2, q8 @ 1628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.i16 q15, q15, q13 @Q15,Q2 = [q2 + (p0+q0+1)>>1] - (q1<<1) 1638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vneg.s8 q13, q7 @Q13 = -C0 1648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.s8 q14, q14, q7 @Q14 = min(C0,i_macro_p1) 1658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.s8 q12, q12, #0 @Q12 = (i_macro >= 0) 1668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d31, q15, #1 @ 1678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d30, q2, #1 @Q15 = i_macro_q1 1688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmax.s8 q14, q14, q13 @Q14 = max( - C0 , min(C0, i_macro_p1) ) 1698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqadd.u8 q8, q3, q9 @Q8 = p0 + delta 1708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqsub.u8 q3, q3, q9 @Q3 = p0 - delta 1718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.s8 q15, q15, q7 @Q15 = min(C0,i_macro_q1) 1728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand.i8 q14, q10, q14 @condition check Ap<beta 1738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqadd.u8 q7, q0, q9 @Q7 = q0 + delta 1748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqsub.u8 q0, q0, q9 @Q0 = q0 - delta 1758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmax.s8 q15, q15, q13 @Q15 = max( - C0 , min(C0, i_macro_q1) ) 1768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q8, q3, q12 @Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta) 1778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q0, q7, q12 @Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta) 1788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i8 q14, q14, q4 @ 1798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand.i8 q15, q11, q15 @condition check Aq<beta 1808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {q8}, [r7], r1 @writting back filtered value of p0 1818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i8 q15, q15, q1 @ 1828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {q0}, [r7], r1 @writting back filtered value of q0 1838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {q14}, [r6] @writting back filtered value of p1 1848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {q15}, [r7], r1 @writting back filtered value of q1 1858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpop {d8 - d15} 1868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldmfd sp!, {r4-r7, pc} 1878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1907497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@** 1918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 1928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 1938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @brief 1948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Performs filtering of a luma block horizontal edge when the 1958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* boundary strength is set to 4 1968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 1978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @par Description: 1988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* This operation is described in Sec. 8.7.2.4 under the title 1998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 2008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 2018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r0 - pu1_src 2028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Pointer to the src sample q0 2038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 2048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r1 - src_strd 2058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Source stride 2068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 2078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r2 - alpha 2088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Alpha Value for the boundary 2098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 2108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r3 - beta 2118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Beta Value for the boundary 2128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 2138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @returns 2148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 2158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 2168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @remarks 2178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 2188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 2198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 2207497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@* 2218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_luma_horz_bs4_a9 2238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_luma_horz_bs4_a9: 2258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ Back up necessary registers on stack 2278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stmfd sp!, {r12, r14} 2288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpush {d8 - d15} 2298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ Init 2308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.8 q0, r2 @duplicate alpha 2318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r12, r0, r1 @pointer to p0 = q0 - src_strd 2328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.8 q1, r3 @duplicate beta 2338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r14, r0, r1, lsl#1 @pointer to p1 = q0 - src_strd*2 2348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r2, r0, r1, lsl#2 @pointer to p3 = q0 - src_strd*4 2358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r3, r14, r1 @pointer to p2 = p1 - src_strd 2368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ Load Data 2388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {d4, d5}, [r0], r1 @load q0 to Q2, q0 = q0 + src_strd 2398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {d6, d7}, [r12] @load p0 to Q3 2408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {d8, d9}, [r0], r1 @load q1 to Q4, q0 = q0 + src_strd 2418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {d10, d11}, [r14] @load p1 to Q5 2428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ Filter Decision 2448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q6, q2, q3 @ABS(p0 - q0) 2458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q7, q4, q2 @ABS(q1 - q0) 2468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q8, q5, q3 @ABS(p1 - p0) 2478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q9, q6, q0 @ABS(p0 - q0) >= Alpha 2488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q7, q7, q1 @ABS(q1 - q0) >= Beta 2498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q8, q8, q1 @ABS(p1 - p0) >= Beta 2508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i8 q10, #2 2518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q9, q9, q7 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta 2528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {d14, d15}, [r0], r1 @load q2 to Q7, q0 = q0 + src_strd 2538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q9, q9, q8 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta 2548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsra.u8 q10, q0, #2 @((Alpha >> 2) + 2) 2558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q11, q7, q2 @Aq = ABS(q2 - q0) 2568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q12, d4, d6 @p0+q0 L 2578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q13, d5, d7 @p0+q0 H 2588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vclt.u8 q11, q11, q1 @Aq < Beta 2598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vclt.u8 q10, q6, q10 @(ABS(p0 - q0) <((Alpha >>2) + 2)) 2608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ Deblock Filtering q0', q1', q2' 2628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q14, q12, d8 @p0+q0+q1 L 2638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q15, q13, d9 @p0+q0+q1 H 2648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand q11, q11, q10 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) 2658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ q0' if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) TRUE 2668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q8, q14, q14 @2*(p0+q0+q1)L 2678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q0, q15, q15 @2*(p0+q0+q1)H 2688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q8, q8, d14 @2*(p0+q0+q1)+q2 L 2698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q0, q0, d15 @2*(p0+q0+q1)+q2 H 2708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q8, q8, d10 @2*(p0+q0+q1)+q2 +p1 L 2718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q0, q0, d11 @2*(p0+q0+q1)+q2 +p1 H 2728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d12, q8, #3 @(2*(p0+q0+q1)+q2 +p1 +4)>> 3 L [q0'] 2738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d13, q0, #3 @(2*(p0+q0+q1)+q2 +p1 +4)>> 3 H [q0'] 2748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ q0" if (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) FALSE 2758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q8, d8, d8 @2*q1 L 2768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q0, d9, d9 @2*q1 H 2778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q8, q8, d4 @2*q1+q0 L 2788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q0, q0, d5 @2*q1+q0 H 2798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q8, q8, d10 @2*q1+q0+p1 L 2808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q0, q0, d11 @2*q1+q0+p1 H 2818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d16, q8, #2 @(2*q1+q0+p1+2)>>2 L [q0"] 2828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d17, q0, #2 @(2*q1+q0+p1+2)>>2 H [q0"] 2838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ q1' 2848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q14, q14, d14 @p0+q0+q1+q2 L 2858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q15, q15, d15 @p0+q0+q1+q2 H 2868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {q0}, [r0], r1 @load q3 to Q0, q0 = q0 + src_strd 2878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit q8, q6, q11 @choosing between q0' and q0" depending on condn 2888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, r1, lsl #2 @pointer to q0 2898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic q11, q11, q9 @((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) 2908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ && (Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) 2918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d12, q14, #2 @(p0+q0+q1+q2+2)>>2 L [q1'] 2928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d13, q15, #2 @(p0+q0+q1+q2+2)>>2 H [q1'] 2938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q2, q8, q9 @choose q0 or filtered q0 2948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @ q2' 2958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q8, d14, d0 @q2+q3,L 2968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q0, d15, d1 @q2+q3,H 2978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q14, q14, q8 @p0+q0+q1+2*q2+q3 L 2988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {d4, d5}, [r0], r1 @store q0 2998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q15, q15, q0 @p0+q0+q1+2*q2+q3 H 3008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q14, q14, q8 @p0+q0+q1+3*q2+2*q3 L 3018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q15, q15, q0 @p0+q0+q1+3*q2+2*q3 H 3028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d0, q14, #3 @(p0+q0+q1+3*q2+2*q3+4)>>3 L [q2'] 3038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d1, q15, #3 @(p0+q0+q1+3*q2+2*q3+4)>>3 H [q2'] 3048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {d30, d31}, [r3] @load p2 to Q15 3058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q6, q4, q11 @choose q1 or filtered value of q1 3068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q8, q15, q3 @Ap,ABS(p2 - p0) 3088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q12, q12, d10 @p0+q0+p1 L 3098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q0, q7, q11 @choose q2 or filtered q2 3108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q13, q13, d11 @p0+q0+p1 H 3118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {d12, d13}, [r0], r1 @store q1 3128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vclt.u8 q8, q8, q1 @Ap < Beta 3138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q14, q12, q12 @2*(p0+q0+p1) L 3148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i16 q2, q13, q13 @2*(p0+q0+p1) H 3158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {d0, d1}, [r0], r1 @store q2 3168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand q10, q10, q8 @((Ap < Beta) && (ABS(p0 - q0) <((Alpha >>2) + 2))) 3178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q14, q14, d30 @2*(p0+q0+p1)+p2 l 3188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q2, q2, d31 @2*(p0+q0+p1)+p2 H 3198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q14, q14, d8 @2*(p0+q0+p1)+p2+q1 L 3208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q2, q2, d9 @2*(p0+q0+p1)+p2+q1 H 3218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d28, q14, #3 @(2*(p0+q0+p1)+p2+q1+4)>>3 L,p0' 3228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d29, q2, #3 @(2*(p0+q0+p1)+p2+q1+4)>>3 H,p0' 3238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i8 d0, #2 3248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i16 d1, #2 3258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q1, d6, d8 @p0+q1 L 3268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmlal.u8 q1, d10, d0 @2*p1+p0+q1 L 3278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q8, d7, d9 @p0+q1 H 3288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmlal.u8 q8, d11, d0 @2*p1+p0+q1 H 3298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q6, q12, d30 @(p0+q0+p1) +p2 L 3308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {d24, d25}, [r2] @load p3,Q12 3318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q2, q13, d31 @(p0+q0+p1) +p2 H 3328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q4, d30, d24 @p2+p3 L 3338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d26, q6, #2 @((p0+q0+p1)+p2 +2)>>2,p1' L 3348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d2, q1, #2 @(2*p1+p0+q1+2)>>2,p0"L 3358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d27, q2, #2 @((p0+q0+p1)+p2 +2)>>2,p1' H 3368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d3, q8, #2 @(2*p1+p0+q1+2)>>2,p0" H 3378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q8, d31, d25 @p2+p3 H 3388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q6, q4, d1[0] @(p0+q0+p1)+3*p2+2*p3 L 3398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q2, q8, d1[0] @(p0+q0+p1)+3*p2+2*p3 H 3408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic q8, q10, q9 @((ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) 3418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @&& (Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) 3428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit q1, q14, q10 @choosing between po' and p0" 3438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d12, q6, #3 @((p0+q0+p1)+3*p2+2*p3+4)>>3 L p2' 3448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d13, q2, #3 @((p0+q0+p1)+3*p2+2*p3+4)>>3 H p2' 3458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q3, q1, q9 @choosing between p0 and filtered value of p0 3468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit q5, q13, q8 @choosing between p1 and p1' 3478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit q15, q6, q8 @choosing between p2 and p2' 3488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {d6, d7}, [r12] @store p0 3498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {d10, d11}, [r14] @store p1 3508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {d30, d31}, [r3] @store p2 3518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpop {d8 - d15} 3528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldmfd sp!, {r12, pc} 3538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3567497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@** 3578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 3588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @brief 3608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Performs filtering of a luma block vertical edge for cases where the 3618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* boundary strength is less than 4 3628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @par Description: 3648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* This operation is described in Sec. 8.7.2.4 under the title 3658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 3668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r0 - pu1_src 3688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Pointer to the src sample q0 3698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r1 - src_strd 3718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Source stride 3728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r2 - alpha 3748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Alpha Value for the boundary 3758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r3 - beta 3778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Beta Value for the boundary 3788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] sp(0) - u4_bs 3808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Packed Boundary strength array 3818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] sp(4) - pu1_cliptab 3838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* tc0_table 3848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @returns 3868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 3878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @remarks 3898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 3908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 3918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 3927497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@* 3938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_luma_vert_bslt4_a9 3958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_luma_vert_bslt4_a9: 3978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stmfd sp!, {r12, lr} 3998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, #4 @pointer uc_edgePixel-4 4018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldr r12, [sp, #8] @r12 = ui_Bs 4028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldr r14, [sp, #12] @r14 = *puc_ClpTab 4038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpush {d8 - d15} 4048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @loading p3:p2:p1:p0:q0:q1:q2:q3 for every row 4058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 {d0}, [r0], r1 @row1 4068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d2, [r0], r1 @row2 4078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d4, [r0], r1 @row3 4088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rev r12, r12 @reversing ui_bs 4098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d6, [r0], r1 @row4 4108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.32 d18[0], r12 @d12[0] = ui_Bs 4118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.32 d16[0], [r14] @D16[0] contains cliptab 4128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d8, [r0], r1 @row5 4138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovl.u8 q9, d18 @q6 = uc_Bs in each 16 bt scalar 4148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d10, [r0], r1 @row6 4158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d12, [r0], r1 @row7 4168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtbl.8 d16, {d16}, d18 @puc_ClipTab[uc_Bs] 4178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d14, [r0], r1 @row8 4188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d1, [r0], r1 @row9 4198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovl.u16 q8, d16 @ 4208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d3, [r0], r1 @row10 4218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d5, [r0], r1 @row11 4228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d7, [r0], r1 @row12 4238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsli.32 q8, q8, #8 @ 4248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d9, [r0], r1 @row13 4258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d11, [r0], r1 @row14 4268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d13, [r0], r1 @row15 4278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsli.32 q8, q8, #16 @Q8 = C0 4288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d15, [r0], r1 @row16 4298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @taking two 8x8 transposes 4318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @2X2 transposes 4328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d0, d2 @row1 &2 4338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d4, d6 @row3&row4 4348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d8, d10 @row5&6 4358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d12, d14 @row7 & 8 4368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d1, d3 @row9 &10 4378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d5, d7 @row11 & 12 4388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d9, d11 @row13 &14 4398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d13, d15 @row15 & 16 4408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @4x4 transposes 4418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d2, d6 @row2 & row4 4428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d10, d14 @row6 & row8 4438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d3, d7 @row10 & 12 4448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d11, d15 @row14 & row16 4458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d6, d14 @row4 & 8 4468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d7, d15 @row 12 & 16 4478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q3 ->p0 and Q7->q3 4498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d0, d4 @row1 & 3 4508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d8, d12 @row 5 & 7 4518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d1, d5 @row9 & row11 4528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d9, d13 @row13 & row15 4538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d0, d8 @row1 & row5 4548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d1, d9 @row9 & 13 4558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q0->p3 & Q4->q0 4578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @starting processing as p0 and q0 are now ready 4588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d2, d10 @row2 &6 4598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrhadd.u8 q10, q3, q4 @((p0 + q0 + 1) >> 1) 4608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d3, d11 @row10&row14 4618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i8 d19, #2 4628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q1->p2 & Q5->q1 4638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d4, d12 @row3 & 7 4648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q11, q3, q4 @ABS(p0 - q0) 4658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d5, d13 @row11 & row15 4668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q12, d20, d2 @(p2 + ((p0 + q0 + 1) >> 1) L 4678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q2->p1,Q6->q2 4688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q13, d21, d3 @(p2 + ((p0 + q0 + 1) >> 1) H 4698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmlsl.u8 q12, d4, d19 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) L 4708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmlsl.u8 q13, d5, d19 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) H 4718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.8 q14, r2 @alpha 4728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcle.u8 q11, q14, q11 @ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0)) 4738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.i8 q14, r3 @beta 4748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q15, q5, q4 @ABS(q1 - q0) 4758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d24, q12, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) L 4768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d25 , q13, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) H 4778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q15, q15, q14 @ABS(q1 - q0) >= Beta 4788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q13, q2, q3 @ABS(p1 - p0) 4798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.s8 q12, q12, q8 @min(deltap1 ,C0) 4808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q11, q11, q15 @ABS(q1 - q0) >= Beta ||ABS(p0 - q0) >= Alpha 4818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vneg.s8 q15, q8 @-C0 4828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q13, q13, q14 @ABS(p1 - p0) >= Beta 4838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmax.s8 q12, q12, q15 @max(deltap1,-C0) 4848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q11, q11, q13 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta) 4858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovl.u16 q13, d18 @ui_bs 4868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q9, d20, d12 @q2 + ((p0 + q0 + 1) >> 1) L 4878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vceq.u32 q13, q13, #0 @ui_bs == 0 4888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q9, q9, d10 @(q2 + ((p0 + q0 + 1) >> 1) - q1) L 4898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q10, d21, d13 @q2 + ((p0 + q0 + 1) >> 1) H 4908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q9, q9, d10 @(q2 + ((p0 + q0 + 1) >> 1) - 2*q1)L 4918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q10, q10, d11 @(q2 + ((p0 + q0 + 1) >> 1) - q1) H 4928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q13, q13, q11 @(ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) &&(ui_bs) 4938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q10, q10, d11 @(q2 + ((p0 + q0 + 1) >> 1) - 2*q1) H 4948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d18, q9, #1 @((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) L 4958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q11, q1, q3 @Ap = ABS(p2 - p0) 4968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d19, q10, #1 @((q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1) H 4978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q10, q6, q4 @Aq= ABS(q2 - q0) 4988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vclt.u8 q11, q11, q14 @Ap < Beta 4998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.s8 q9, q9, q8 @min(delatq1,C0) 5008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vclt.u8 q10, q10, q14 @Aq <Beta 5018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubl.u8 q14, d8, d6 @(q0 - p0) L 5028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmax.s8 q9, q9, q15 @max(deltaq1,-C0) 5038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubl.u8 q15, d9, d7 @(q0 - p0) H 5048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshl.s16 q14, q14, #2 @(q0 - p0)<<2 L 5058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.u8 q8, q8, q11 @C0 + (Ap < Beta) 5068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshl.s16 q15, q15, #2 @(q0 - p0) << 2) H 5078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q14, q14, d4 @((q0 - p0) << 2) + (p1 L 5088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q15, q15, d5 @((q0 - p0) << 2) + (p1 H 5098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q14, q14, d10 @((q0 - p0) << 2) + (p1 - q1) L 5108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q15, q15, d11 @((q0 - p0) << 2) + (p1 - q1) H 5118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic q11, q11, q13 @final condition for p1 5128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.s16 d28, q14, #3 @delta = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3); L 5138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.s16 d29, q15, #3 @delta = ((((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) H 5148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.u8 q8, q8, q10 @C0 + (Ap < Beta) + (Aq < Beta) 5158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic q10, q10, q13 @final condition for q1 5168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabs.s8 q15, q14 @abs(delta) 5178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand q12, q12, q11 @delatp1 5188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand q9, q9, q10 @delta q1 5198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.u8 q15, q15, q8 @min((abs(delta),C) 5208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i8 q2, q2, q12 @p1+deltap1 5218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i8 q5, q5, q9 @q1+deltaq1 5228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic q15, q15, q13 @abs(delta) of pixels to be changed only 5238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.s8 q14, q14, #0 @sign(delta) 5248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqsub.u8 q11, q3, q15 @clip(p0-delta) 5258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d0, d2 @row1 &2 5268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqadd.u8 q3, q3, q15 @clip(p0+delta) 5278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d1, d3 @row9 &10 5288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqadd.u8 q12, q4, q15 @clip(q0+delta) 5298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d12, d14 @row7 & 8 5308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqsub.u8 q4, q4, q15 @clip(q0-delta) 5318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d13, d15 @row15 & 16 5328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q3, q11, q14 @p0 5338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q4, q12, q14 @q0 5348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d4, d6 @row3&row4 5358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d8, d10 @row5&6 5368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d5, d7 @row11 & 12 5378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d9, d11 @row13 &14 5388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d2, d6 @row2 & row4 5398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d10, d14 @row6 & row8 5408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d3, d7 @row10 & 12 5418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d11, d15 @row14 & row16 5428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d6, d14 @row4 & 8 5438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d7, d15 @row 12 & 16 5448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q3 ->p0 and Q7->q3 5458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d0, d4 @row1 & 3 5468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d8, d12 @row 5 & 7 5478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d1, d5 @row9 & row11 5488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d9, d13 @row13 & row15 5498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, r1, lsl#4 @restore pointer 5508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d0, d8 @row1 & row5 5518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d1, d9 @row9 & 13 5528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d2, d10 @row2 &6 5538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d3, d11 @row10&row14 5548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d4, d12 @row3 & 7 5558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d5, d13 @row11 & row15 5568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 {d0}, [r0], r1 @row1 5578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d2, [r0], r1 @row2 5588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d4, [r0], r1 @row3 5598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d6, [r0], r1 @row4 5608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d8, [r0], r1 @row5 5618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d10, [r0], r1 @row6 5628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d12, [r0], r1 @row7 5638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d14, [r0], r1 @row8 5648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d1, [r0], r1 @row9 5658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d3, [r0], r1 @row10 5668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d5, [r0], r1 @row11 5678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d7, [r0], r1 @row12 5688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d9, [r0], r1 @row13 5698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d11, [r0], r1 @row14 5708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d13, [r0], r1 @row15 5718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d15, [r0], r1 @row16 5728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpop {d8 - d15} 5738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldmfd sp!, {r12, pc} 5748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5777497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@** 5788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 5798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 5808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @brief 5818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Performs filtering of a luma block vertical edge when the 5828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* boundary strength is set to 4 5838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 5848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @par Description: 5858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* This operation is described in Sec. 8.7.2.4 under the title 5868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 5878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 5888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r0 - pu1_src 5898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Pointer to the src sample q0 5908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 5918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r1 - src_strd 5928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Source stride 5938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 5948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r2 - alpha 5958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Alpha Value for the boundary 5968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 5978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r3 - beta 5988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Beta Value for the boundary 5998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 6008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @returns 6018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 6028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 6038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @remarks 6048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 6058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 6068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 6077497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@* 6088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_luma_vert_bs4_a9 6108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_luma_vert_bs4_a9: 6128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stmfd sp!, {r12, lr} 6148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpush {d8 - d15} 6158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, #4 @pointer uc_edgePixel-4 6168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @loading p3:p2:p1:p0:q0:q1:q2:q3 for every row 6178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d0, [r0], r1 @row1 6188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d2, [r0], r1 @row2 6198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d4, [r0], r1 @row3 6208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d6, [r0], r1 @row4 6218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d8, [r0], r1 @row5 6228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d10, [r0], r1 @row6 6238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d12, [r0], r1 @row7 6248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d14, [r0], r1 @row8 6258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d1, [r0], r1 @row9 6268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d3, [r0], r1 @row10 6278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d5, [r0], r1 @row11 6288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d7, [r0], r1 @row12 6298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d9, [r0], r1 @row13 6308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d11, [r0], r1 @row14 6318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d13, [r0], r1 @row15 6328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.8 d15, [r0], r1 @row16 6338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @taking two 8x8 transposes 6348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @2X2 transposes 6358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d0, d2 @row1 &2 6368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d4, d6 @row3&row4 6378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d8, d10 @row5&6 6388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d12, d14 @row7 & 8 6398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d1, d3 @row9 &10 6408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d5, d7 @row11 & 12 6418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d9, d11 @row13 &14 6428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d13, d15 @row15 & 16 6438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @4x4 transposes 6448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d2, d6 @row2 & row4 6458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d10, d14 @row6 & row8 6468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d3, d7 @row10 & 12 6478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d11, d15 @row14 & row16 6488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d6, d14 @row4 & 8 6498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d7, d15 @row 12 & 16 6508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q3 ->p0 and Q7->q3 6518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d0, d4 @row1 & 3 6528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d8, d12 @row 5 & 7 6538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d1, d5 @row9 & row11 6548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d9, d13 @row13 & row15 6558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d0, d8 @row1 & row5 6568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d1, d9 @row9 & 13 6578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q0->p3 & Q4->q0 6588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @starting processing as p0 and q0 are now ready 6598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q1->p2 & Q5->q1 6608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpush {q7} @saving in stack 6618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d4, d12 @row3 & 7 6628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i16 q14, #2 6638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d5, d13 @row11 & row15 6648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q8, d6, d8 @p0+q0 L 6658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d2, d10 @row2 &6 6668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q9, d7, d9 @p0+q0 H 6678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d3, d11 @row10&row14 6688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q10, q8, d4 @p0+q0+p1 L 6698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q11, q9, d5 @p0+q0+p1 H 6708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q12, d2, d10 @p2+q1 L 6718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q13, d3, d11 @p2+q1 H 6728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q12, q10, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1 L 6738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q13, q11, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1 H 6748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i8 q14, #2 6758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q8, q10, d2 @p0+q0+p1+p2 L 6768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q9, q11, d3 @p0+q0+p1+p2 H 6778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.i8 q15, r2 @duplicate alpha 6788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d20, q8, #2 @(p2 + p1 + p0 + q0 + 2) >> 2)L p1' 6798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d21, q9, #2 @(p2 + p1 + p0 + q0 + 2) >> 2)H p1' 6808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q11, q3, q4 @ABD(p0-q0) 6818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsra.u8 q14, q15, #2 @alpha >>2 +2 6828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q15, q1, q3 @Ap = ABD(p2-p0) 6838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d24, q12, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) L p0' 6848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d25, q13, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) H p0' 6858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.i8 q13, r3 @beta 6868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.u8 q14, q14, q11 @ABS(p0 - q0) <((Alpha >>2) + 2) 6878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q11, d6, d10 @p0+q1 L 6888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.u8 q7, q13, q15 @beta>Ap 6898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q15, d7, d11 @p0+q1 H 6908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q11, q11, d4 @p0+q1+p1 L 6918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q15, q15, d5 @p0+q1+p1 H 6928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q11, q11, d4 @p0+q1+2*p1 L 6938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q15, q15, d5 @p0+q1+2*p1 H 6948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand q7, q7, q14 @(Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2) 6958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d22, q11, #2 @((X2(p1) + p0 + q1 + 2) >> 2) L p0" 6968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d23, q15, #2 @((X2(p1) + p0 + q1 + 2) >> 2) H p0" 6978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q15, d2, d0 @p2+p3 L 6988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q12, q11, q7 @p0' or p0 " 6998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q11, d3, d1 @p2+p3 H 7008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.u16 q15, q15, q15 @2*(p2+p3) L 7018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.u16 q11, q11, q11 @2*(p2+p3)H 7028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.u16 q8, q8, q15 @(X2(p3) + X3(p2) + p1 + p0 + q0) L 7038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.u16 q9, q9, q11 @(X2(p3) + X3(p2) + p1 + p0 + q0) H 7048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q15, q6, q4 @Aq = abs(q2-q0) 7058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q11, q5, q4 @ABS(Q1-Q0) 7068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d16, q8, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); L p2' 7078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d17, q9, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); H p2' 7088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q9, q2, q3 @ABS(p1-p0) 7098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.u8 q15, q13, q15 @Aq < Beta 7108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q11, q11, q13 @ABS(q1 - q0) >= Beta 7118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q9, q9, q13 @ABS(p1 - p0) >= beta 7128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.i8 q13, r2 @duplicate alpha 7138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand q15, q15, q14 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) 7148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 q14, q3, q4 @abs(p0-q0) 7158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q11, q11, q9 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta 7168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q9, d6, d8 @p0+q0 L 7178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 q14, q14, q13 @ABS(p0 - q0) >= Alpha 7188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q13, d7, d9 @p0+q0 H 7198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q9, q9, d10 @p0+q0+q1 L 7208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr q11, q11, q14 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta||ABS(p0 - q0) >= Alpha 7218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q13, q13, d11 @p0+q0+q1 H 7228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic q7, q7, q11 @final condn for p's 7238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i8 q14, #2 7248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q3, q12, q11 @final p0 7258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit q1, q8, q7 @final p2 7268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q10, q2, q7 @final p1 7278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q12, d8, d4 @q0+p1 L 7288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmlal.u8 q12, d10, d28 @X2(q1) + q0 + p1 L 7298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q8, d9, d5 @q0+p1 H 7308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmlal.u8 q8, d11, d28 @X2(q1) + q0 + p1 H 7318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i16 q14, #2 7328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q7, d4, d12 @p1+q2 L 7338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q7, q9, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2L 7348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q2, d5, d13 @p1+q2H 7358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q2, q13, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2H 7368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d24, q12, #2 @(X2(q1) + q0 + p1 + 2) >> 2; L q0' 7378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d25, q8, #2 @(X2(q1) + q0 + p1 + 2) >> 2; H q0' 7388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q9, q9, d12 @p0 + q0 + q1 + q2 L 7398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q13, q13, d13 @p0 + q0 + q1 + q2 H 7408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d16, q7, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 L qo" 7418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpop {q7} 7428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d17, q2, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 H qo" 7438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d4, q9, #2 @p0 + q0 + q1 + q2 + 2)>>2 L q1' 7448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d5, q13, #2 @p0 + q0 + q1 + q2 + 2)>>2 H q1' 7458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit q12, q8, q15 @q0' or q0" 7468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic q15, q15, q11 @final condn for q's 7478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d0, d2 @row1 &2 7488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit q5, q2, q15 @final q1 7498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d1, d3 @row9 &10 7508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q8, d12, d14 @q2+q3 L 7518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d20, d6 @row3&row4 7528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q2, d13, d15 @q2+q3 H 7538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d21, d7 @row11 & 12 7548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q9, q8, q14 @X2(q3) + X3(q2) + q1 + q0 + p0 L 7558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d2, d6 @row2 & row4 7568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q13, q2, q14 @X2(q3) + X3(q2) + q1 + q0 + p0 H 7578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d3, d7 @row10 & 12 7588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif q4, q12, q11 @final q0 7598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d0, d20 @row1 & 3 7608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d18, q9, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; L 7618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d1, d21 @row9 & row11 7628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d19, q13, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; H 7638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d8, d10 @row5&6 7648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit q6, q9, q15 @final q2 7658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d9, d11 @row13 &14 7668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d12, d14 @row7 & 8 7678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.8 d13, d15 @row15 & 16 7688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d10, d14 @row6 & row8 7698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d11, d15 @row14 & row16 7708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @now Q3 ->p0 and Q7->q3 7718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d8, d12 @row 5 & 7 7728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.16 d9, d13 @row13 & row15 7738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, r1, lsl#4 @restore pointer 7748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d6, d14 @row4 & 8 7758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d7, d15 @row 12 & 16 7768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d0, d8 @row1 & row5 7778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d1, d9 @row9 & 13 7788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d2, d10 @row2 &6 7798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d3, d11 @row10&row14 7808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d20, d12 @row3 & 7 7818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtrn.32 d21, d13 @row11 & row15 7828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d0, [r0], r1 @row1 7838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d2, [r0], r1 @row2 7848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d20, [r0], r1 @row3 7858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d6, [r0], r1 @row4 7868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d8, [r0], r1 @row5 7878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d10, [r0], r1 @row6 7888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d12, [r0], r1 @row7 7898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d14, [r0], r1 @row8 7908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d1, [r0], r1 @row9 7918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d3, [r0], r1 @row10 7928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d21, [r0], r1 @row11 7938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d7, [r0], r1 @row12 7948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d9, [r0], r1 @row13 7958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d11, [r0], r1 @row14 7968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d13, [r0], r1 @row15 7978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst1.8 d15, [r0], r1 @row16 7988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpop {d8 - d15} 7998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldmfd sp!, {r12, pc} 8008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8037497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@** 8048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 8058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @brief 8078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Performs filtering of a luma block vertical edge when the 8088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* boundary strength is set to 4 on calling twice 8098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @par Description: 8118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* This operation is described in Sec. 8.7.2.4 under the title 8128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 8138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r0 - pu1_src 8158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Pointer to the src sample q0 8168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r1 - src_strd 8188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Source stride 8198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r2 - alpha 8218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Alpha Value for the boundary 8228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r3 - beta 8248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Beta Value for the boundary 8258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @returns 8278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 8288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @remarks 8308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 8318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 8328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 8337497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@* 8348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_luma_vert_bs4_mbaff_a9 8368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_luma_vert_bs4_mbaff_a9: 8388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stmfd sp!, {lr} 8408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, #4 @pointer uc_edgePixel-4 8428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpush {d8 - d15} 8438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @loading [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] for every row 8448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 8458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 8468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 8478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 8488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 8498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 8508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 8518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 8528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vuzp.8 d0, d1 @D0->p3, D1->p2 8548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vuzp.8 d2, d3 @D2->p1, D3->p0 8558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vuzp.8 d4, d5 @D4->q0, D5->q1 8568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vuzp.8 d6, d7 @D6->q2, D7->q3 8578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i16 q14, #2 8598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q4, d3, d4 @p0+q0 8608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q5, q4, d2 @p0+q0+p1 8618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q6, d1, d5 @p2+q1 8628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q6, q5, q14 @p2 + X2(p1) + X2(p0) + X2(q0) + q1 8638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i8 d14, #2 8658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q4, q5, d1 @p0+q0+p1+p2 8668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.i8 d15, r2 @duplicate alpha 8678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d10, q4, #2 @(p2 + p1 + p0 + q0 + 2) >> 2) p1' 8688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d11, d3, d4 @ABD(p0-q0) 8698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsra.u8 d14, d15, #2 @alpha >>2 +2 8708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d15, d1, d3 @Ap = ABD(p2-p0) 8718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d12, q6, #3 @((p2 + X2(p1) + X2(p0) + X2(q0) + q1 + 4) >> 3) p0' 8728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.i8 d13, r3 @beta 8738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.u8 d14, d14, d11 @ABS(p0 - q0) <((Alpha >>2) + 2) 8748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q8, d3, d5 @p0+q1 8758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.u8 d26, d13, d15 @beta>Ap 8768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q8, q8, d2 @p0+q1+p1 8778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q8, q8, d2 @p0+q1+2*p1 8788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand d26, d26, d14 @(Ap < Beta && ABS(p0 - q0) <((Alpha >>2) + 2) 8798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d11, q8, #2 @((X2(p1) + p0 + q1 + 2) >> 2) p0" 8808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif d12, d11, d26 @p0' or p0 " 8818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q9, d1, d0 @p2+p3 8828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.u16 q9, q9, q9 @2*(p2+p3) 8838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.u16 q4, q4, q9 @(X2(p3) + X3(p2) + p1 + p0 + q0) 8848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d15, d6, d4 @Aq = abs(q2-q0) 8858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d11, d5, d4 @ABS(q1-q0) 8868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d8, q4, #3 @((X2(p3) + X3(p2) + p1 + p0 + q0 + 4) >> 3); p2' 8878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d9, d2, d3 @ABS(p1-p0) 8888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcgt.u8 d15, d13, d15 @Aq < Beta 8898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 d11, d11, d13 @ABS(q1 - q0) >= Beta 8908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 d9, d9, d13 @ABS(p1 - p0) >= beta 8918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.i8 d13, r2 @duplicate alpha 8928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand d15, d15, d14 @(Aq < Beta && ABS(p0 - q0) <((Alpha >>2) + 2)) 8938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d14, d3, d4 @abs(p0-q0) 8948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr d11, d11, d9 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta 8958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 d14, d14, d13 @ABS(p0 - q0) >= Alpha 8968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q10, d3, d4 @p0+q0 8978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr d11, d11, d14 @ABS(p1 - p0) >= Beta || ABS(q1 - q0) >= Beta||ABS(p0 - q0) >= Alpha 8988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q10, q10, d5 @p0+q0+q1 8998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic d26, d26, d11 @final condn for p's 9008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i8 d14, #2 9018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif d3, d12, d11 @final p0 9028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit d1, d8, d26 @final p2 9038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif d10, d2, d26 @final p1 9048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q6, d4, d2 @q0+p1 9058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmlal.u8 q6, d5, d14 @X2(q1) + q0 + p1 9068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q11, d2, d6 @p1+q2 9088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q11, q10, q14 @p1 + X2(p0) + X2(q0) + X2(q1) + q2 9098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d12, q6, #2 @(X2(q1) + q0 + p1 + 2) >> 2; q0' 9108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q10, q10, d6 @p0 + q0 + q1 + q2 9118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d8, q11, #3 @(p1 + X2(p0) + X2(q0) + X2(q1) + q2 + 4) >> 3 qo" 9128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d2, q10, #2 @p0 + q0 + q1 + q2 + 2)>>2 q1' 9148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit d12, d8, d15 @q0' or q0" 9158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic d15, d15, d11 @final condn for q's 9168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit d5, d2, d15 @final q1 9178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q12, d6, d7 @q2+q3 9188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmla.u16 q10, q12, q14 @X2(q3) + X3(q2) + q1 + q0 + p0 9198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif d4, d12, d11 @final q0 9208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshrn.u16 d9, q10, #3 @(X2(q3) + X3(q2) + q1 + q0 + p0 + 4) >> 3; 9218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbit d6, d9, d15 @final q2 9228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand d2, d10, d10 @D0->p3, D1->p2, D2->p1, D3->p0, D4->q0, D5->q1, D6->q2, D7->q3 9238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vzip.8 d0, d1 @D0,D1 -> [p3:p2] 9258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vzip.8 d2, d3 @D2,D3 -> [p1:p0] 9268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vzip.8 d4, d5 @D4,D5 -> [q0:q1] 9278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vzip.8 d6, d7 @D6,D7 -> [q2:q3] 9288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, r1, lsl#3 @restore pointer 9308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @storing [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] in every row 9328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 9338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 9348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 9358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 9368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 9378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 9388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 9398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 9408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpop {d8 - d15} 9418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldmfd sp!, {pc} 9428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9457497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@** 9468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 9478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @brief 9498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Performs filtering of a luma block vertical edge for cases where the 9508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* boundary strength is less than 4 on calling twice 9518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @par Description: 9538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* This operation is described in Sec. 8.7.2.4 under the title 9548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 9558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r0 - pu1_src 9578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Pointer to the src sample q0 9588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r1 - src_strd 9608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Source stride 9618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r2 - alpha 9638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Alpha Value for the boundary 9648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] r3 - beta 9668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Beta Value for the boundary 9678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] sp(0) - u4_bs 9698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* Packed Boundary strength array 9708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @param[in] sp(4) - pu1_cliptab 9728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* tc0_table 9738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @returns 9758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 9768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* @remarks 9788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* None 9798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@* 9808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@******************************************************************************* 9817497191460a9504f8b4f64df169ab633f0b74353Harish Mahendrakar@* 9828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_luma_vert_bslt4_mbaff_a9 9848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_luma_vert_bslt4_mbaff_a9: 9868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stmfd sp!, {r12, lr} 9888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, #4 @pointer uc_edgePixel-4 9908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldr r12, [sp, #8] @r12 = ui_Bs 9918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldr r14, [sp, #12] @r14 = pu1_ClipTab 9928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpush {d8 - d15} 9938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @loading [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] for every row 9948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 9958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 9968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 9978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 9988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 9998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 10008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 10018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 10028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vuzp.8 d0, d1 @D0->p3, D1->p2 10048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vuzp.8 d2, d3 @D2->p1, D3->p0 10058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vuzp.8 d4, d5 @D4->q0, D5->q1 10068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vuzp.8 d6, d7 @D6->q2, D7->q3 10078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rev r12, r12 @reversing ui_bs 10098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.32 d8[0], r12 @D8[0] = ui_Bs 10108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vld1.32 d9[0], [r14] @D9[0] contains cliptab 10118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovl.u8 q15, d8 @D30 = ui_Bs in each 16 bt scalar 10128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vtbl.8 d8, {d9}, d30 @puc_ClipTab[ui_Bs] 10138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsli.16 d8, d8, #8 @D8 = C0 10148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrhadd.u8 d10, d3, d4 @((p0 + q0 + 1) >> 1) 10168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmov.i8 d31, #2 10178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d11, d3, d4 @ABS(p0 - q0) 10188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q6, d10, d1 @(p2 + ((p0 + q0 + 1) >> 1) 10198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmlsl.u8 q6, d2, d31 @(p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) 10208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.8 d14, r2 @alpha 10218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcle.u8 d11, d14, d11 @ABS(p0 - q0) >= Alpha(Alpha <=ABS(p0 - q0)) 10228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vdup.i8 d14, r3 @beta 10238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d15, d5, d4 @ABS(q1 - q0) 10248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d12, q6, #1 @((p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1) 10258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 d15, d15, d14 @ABS(q1 - q0) >= Beta 10268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d13, d2, d3 @ABS(p1 - p0) 10278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.s8 d12, d12, d8 @min(deltap1 ,C0) 10288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr d11, d11, d15 @ABS(q1 - q0) >= Beta ||ABS(p0 - q0) >= Alpha 10298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vneg.s8 d15, d8 @-C0 10308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.u8 d13, d13, d14 @ABS(p1 - p0) >= Beta 10318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmax.s8 d12, d12, d15 @max(deltap1,-C0) 10328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr d11, d11, d13 @ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta) 10338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vceq.u16 d13, d30, #0 @ui_bs == 0 10348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddl.u8 q14, d10, d6 @q2 + ((p0 + q0 + 1) >> 1) 10358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q14, q14, d5 @q2 + ((p0 + q0 + 1) >> 1) - q1 10368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q14, q14, d5 @q2 + ((p0 + q0 + 1) >> 1) - 2*q1 10378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vorr d13, d13, d11 @(ABS(p0 - q0) >= Alpha || ABS(q1 - q0) >= Beta || ABS(p1 - p0) >= Beta)) 10388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @|| (ui_bs == 0) 10398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqshrn.s16 d9, q14, #1 @(q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1 10408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d11, d1, d3 @Ap = ABS(p2 - p0) 10418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabd.u8 d10, d6, d4 @Aq= ABS(q2 - q0) 10428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vclt.u8 d11, d11, d14 @Ap < Beta 10438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.s8 d9, d9, d8 @min(deltaq1,C0) 10448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vclt.u8 d10, d10, d14 @Aq < Beta 10458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmax.s8 d9, d9, d15 @max(deltaq1,-C0) 10468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubl.u8 q7, d4, d3 @q0 - p0 10478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vshl.s16 q7, q7, #2 @(q0 - p0) << 2 10488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.u8 d8, d8, d11 @C0 + (Ap < Beta) 10498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vaddw.u8 q7, q7, d2 @((q0 - p0) << 2) + p1 10508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsubw.u8 q7, q7, d5 @((q0 - p0) << 2) + (p1 - q1) 10518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic d11, d11, d13 @final condition for p1 10528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vrshr.s16 q15, q7, #3 @delta = (((q0 - p0) << 2) + (p1 - q1) + 4) >> 3 10538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vsub.u8 d8, d8, d10 @C0 + (Ap < Beta) + (Aq < Beta) 10548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic d10, d10, d13 @final condition for q1 10558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vabs.s16 q14, q15 10568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovn.i16 d15, q14 @abs(delta) 10578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand d12, d12, d11 @delatp1 10588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vand d9, d9, d10 @deltaq1 10598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmin.u8 d15, d15, d8 @min((abs(delta),C) 10608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i8 d2, d2, d12 @p1+deltap1 10618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vadd.i8 d5, d5, d9 @q1+deltaq1 10628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbic d15, d15, d13 @abs(delta) of pixels to be changed only 10638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vcge.s16 q14, q15, #0 10648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vmovn.i16 d14, q14 @sign(delta) 10658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqsub.u8 d11, d3, d15 @clip(p0-delta) 10668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqadd.u8 d3, d3, d15 @clip(p0+delta) 10678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqadd.u8 d12, d4, d15 @clip(q0+delta) 10688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vqsub.u8 d4, d4, d15 @clip(q0-delta) 10698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif d3, d11, d14 @p0 10708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vbif d4, d12, d14 @q0 10718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub r0, r0, r1, lsl#3 @restore pointer 10738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @D0->p3, D1->p2, D2->p1, D3->p0, D4->q0, D5->q1, D6->q2, D7->q3 10748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vzip.8 d0, d1 @D0,D1 -> [p3:p2] 10758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vzip.8 d2, d3 @D2,D3 -> [p1:p0] 10768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vzip.8 d4, d5 @D4,D5 -> [q0:q1] 10778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vzip.8 d6, d7 @D6,D7 -> [q2:q3] 10788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S @storing [p3:p2],[p1:p0]:[q0:q1]:[q2:q3] in every row 10808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r1 10818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d0[1], d2[1], d4[1], d6[1]}, [r0], r1 10828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d0[2], d2[2], d4[2], d6[2]}, [r0], r1 10838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d0[3], d2[3], d4[3], d6[3]}, [r0], r1 10848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d1[0], d3[0], d5[0], d7[0]}, [r0], r1 10858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d1[1], d3[1], d5[1], d7[1]}, [r0], r1 10868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d1[2], d3[2], d5[2], d7[2]}, [r0], r1 10878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vst4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0], r1 10888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S vpop {d8 - d15} 10898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldmfd sp!, {r12, pc} 10908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1093