18d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//****************************************************************************** 28d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 38d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Copyright (C) 2015 The Android Open Source Project 48d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 58d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Licensed under the Apache License, Version 2.0 (the "License"); 68d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* you may not use this file except in compliance with the License. 78d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* You may obtain a copy of the License at: 88d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 98d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* http://www.apache.org/licenses/LICENSE-2.0 108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Unless required by applicable law or agreed to in writing, software 128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* distributed under the License is distributed on an "AS IS" BASIS, 138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* See the License for the specific language governing permissions and 158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* limitations under the License. 168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//***************************************************************************** 188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/ 208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*****************************************************************************/ 218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* */ 228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* File Name : ih264_deblk_chroma_av8.s */ 238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* */ 248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* Description : Contains function definitions for deblocking luma */ 258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* edge. Functions are coded in NEON assembly and can */ 268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* be compiled using ARM RVDS. */ 278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* */ 288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* List of Functions : ih264_deblk_chroma_vert_bs4_av8() */ 298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* ih264_deblk_chroma_vert_bslt4_av8() */ 308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* ih264_deblk_chroma_horz_bs4_av8() */ 318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* ih264_deblk_chroma_horz_bslt4_av8() */ 328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* Issues / Problems : None */ 338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* */ 348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* Revision History : */ 358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* */ 368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* DD MM YYYY Author(s) Changes (Describe the changes made) */ 378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///* 28 11 2013 Ittiam Draft */ 388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*****************************************************************************/ 398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.text 428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.p2align 2 438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.include "ih264_neon_macros.s" 448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///** 468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief 498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Performs filtering of a chroma block horizontal edge when the 508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* boundary strength is set to 4 in high profile 518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description: 538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* This operation is described in Sec. 8.7.2.4 under the title 548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] x0 - pu1_src 578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Pointer to the src sample q0 588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 59d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w1 - src_strd 608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Source stride 618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 62d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w2 - alpha_cb 638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Alpha Value for the boundary in U 648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 65d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w3 - beta_cb 668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Beta Value for the boundary in U 678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 68d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w4 - alpha_cr 698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Alpha Value for the boundary in V 708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 71d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w5 - beta_cr 728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Beta Value for the boundary in V 738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns 758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks 788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/ 828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_chroma_horz_bs4_av8 848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_chroma_horz_bs4_av8: 868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // STMFD sp!,{x4-x6,x14} // 888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S push_v_regs 898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stp x19, x20, [sp, #-16]! 90d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo sxtw x1, w1 918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x6, x5 928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x5, x4 938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x0, x0, x1, lsl #1 //x0 = uc_edgePixel pointing to p1 of chroma 948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld2 {v6.8b, v7.8b}, [x0], x1 //D6 = p1u , D7 = p1v 958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x4, x0 //Keeping a backup of the pointer p0 of chroma 968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld2 {v4.8b, v5.8b}, [x0], x1 //D4 = p0u , D5 = p0v 978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v20.8b, w2 //D20 contains alpha_cb 988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v21.8b, w5 //D21 contains alpha_cr 998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v20.d[1], v21.d[0] 1008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld2 {v0.8b, v1.8b}, [x0], x1 //D0 = q0u , D1 = q0v 1018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uaddl v8.8h, v6.8b, v0.8b // 1028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uaddl v10.8h, v7.8b, v1.8b //Q4,Q5 = q0 + p1 1038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S movi v31.8b, #2 // 1048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld2 {v2.8b, v3.8b}, [x0] //D2 = q1u , D3 = q1v 1058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v0.d[1], v1.d[0] 1068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.d[1], v3.d[0] 1078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.d[1], v5.d[0] 1088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v6.d[1], v7.d[0] 1098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v26.16b, v6.16b , v4.16b //Q13 = ABS(p1 - p0) 1108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umlal v8.8h, v2.8b, v31.8b // 1118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umlal v10.8h, v3.8b, v31.8b //Q5,Q4 = (X2(q1U) + q0U + p1U) 1128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v22.16b, v4.16b , v0.16b //Q11 = ABS(p0 - q0) 1138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v24.16b, v2.16b , v0.16b //Q12 = ABS(q1 - q0) 1148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uaddl v14.8h, v4.8b, v2.8b // 1158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uaddl v28.8h, v5.8b, v3.8b //Q14,Q7 = P0 + Q1 1168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v16.8b, w3 //D16 contains beta_cb 1178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v17.8b, w6 //D17 contains beta_cr 1188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v16.d[1], v17.d[0] 1198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umlal v14.8h, v6.8b, v31.8b // 1208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umlal v28.8h, v7.8b, v31.8b //Q14,Q7 = (X2(p1U) + p0U + q1U) 1218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhs v18.16b, v22.16b, v20.16b 1228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhs v24.16b, v24.16b, v16.16b 1238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhs v26.16b, v26.16b, v16.16b 1248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rshrn v8.8b, v8.8h, #2 // 1258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rshrn v9.8b, v10.8h, #2 //Q4 = (X2(q1U) + q0U + p1U + 2) >> 2 1268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v8.d[1], v9.d[0] 1278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S orr v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) 1288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rshrn v10.8b, v14.8h, #2 // 1298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rshrn v11.8b, v28.8h, #2 //Q5 = (X2(p1U) + p0U + q1U + 2) >> 2 1308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.d[1], v11.d[0] 1318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S orr v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) 1328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v10.16b, v4.16b , v18.16b // 1338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v8.16b, v0.16b , v18.16b // 1348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v11.d[0], v10.d[1] 1358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v9.d[0], v8.d[1] 1368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st2 {v10.8b, v11.8b}, [x4], x1 // 1378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st2 {v8.8b, v9.8b}, [x4] // 1388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // LDMFD sp!,{x4-x6,pc} // 1398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldp x19, x20, [sp], #16 1408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pop_v_regs 1418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ret 1428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///** 1468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 1478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 1488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief 1498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Performs filtering of a chroma block vertical edge when the 1508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* boundary strength is set to 4 in high profile 1518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 1528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description: 1538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* This operation is described in Sec. 8.7.2.4 under the title 1548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 1558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 1568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] x0 - pu1_src 1578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Pointer to the src sample q0 1588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 159d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w1 - src_strd 1608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Source stride 1618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 162d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w2 - alpha_cb 1638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Alpha Value for the boundary in U 1648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 165d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w3 - beta_cb 1668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Beta Value for the boundary in U 1678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 168d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w4 - alpha_cr 1698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Alpha Value for the boundary in V 1708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 171d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w5 - beta_cr 1728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Beta Value for the boundary in V 1738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 1748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns 1758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 1768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 1778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks 1788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 1798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 1808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 1818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/ 1828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_chroma_vert_bs4_av8 1848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_chroma_vert_bs4_av8: 1868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // STMFD sp!,{x4,x5,x12,x14} 1888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S push_v_regs 1898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stp x19, x20, [sp, #-16]! 190d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo sxtw x1, w1 1918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x0, x0, #4 //point x0 to p1u of row0. 1938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x12, x0 //keep a back up of x0 for buffer write 1948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 195d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo add w2, w2, w4, lsl #8 //w2 = (alpha_cr,alpha_cb) 196d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo add w3, w3, w5, lsl #8 //w3 = (beta_cr,beta_cb) 1978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1 1998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1 2008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v0.h, v1.h, v2.h, v3.h}[2], [x0], x1 2018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v0.h, v1.h, v2.h, v3.h}[3], [x0], x1 2028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v4.h, v5.h, v6.h, v7.h}[0], [x0], x1 2048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v4.h, v5.h, v6.h, v7.h}[1], [x0], x1 2058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v4.h, v5.h, v6.h, v7.h}[2], [x0], x1 2068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v4.h, v5.h, v6.h, v7.h}[3], [x0], x1 2078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.16b, v2.16b 2098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.16b, v1.16b 2108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v1.16b, v4.16b 2118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.16b, v10.16b 2128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.16b, v6.16b 2138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v6.16b, v3.16b 2148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v3.16b, v5.16b 2158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v5.16b, v10.16b 2168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v22.8h, w2 //Q11 = alpha 2188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v24.8h, w3 //Q12 = beta 2198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S movi v31.8b, #2 2208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v0.d[1], v1.d[0] 2228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.d[1], v3.d[0] 2238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.d[1], v5.d[0] 2248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v6.d[1], v7.d[0] 2258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v8.16b, v2.16b , v4.16b //|p0-q0| 2278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v10.16b, v6.16b , v4.16b //|q1-q0| 2288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v12.16b, v0.16b , v2.16b //|p1-p0| 2298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uaddl v14.8h, v2.8b, v6.8b 2308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uaddl v16.8h, v3.8b, v7.8b //(p0 + q1) 2318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhi v8.16b, v22.16b , v8.16b //|p0-q0| < alpha ? 2328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhi v10.16b, v24.16b , v10.16b //|q1-q0| < beta ? 2338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhi v12.16b, v24.16b , v12.16b //|p1-p0| < beta ? 2348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umlal v14.8h, v0.8b, v31.8b 2358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umlal v16.8h, v1.8b, v31.8b //2*p1 + (p0 + q1) 2368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uaddl v18.8h, v0.8b, v4.8b 2378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uaddl v20.8h, v1.8b, v5.8b //(p1 + q0) 2388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta 2398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umlal v18.8h, v6.8b, v31.8b 2408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umlal v20.8h, v7.8b, v31.8b //2*q1 + (p1 + q0) 2418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rshrn v14.8b, v14.8h, #2 2438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rshrn v15.8b, v16.8h, #2 //(2*p1 + (p0 + q1) + 2) >> 2 2448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v14.d[1], v15.d[0] 2458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S and v8.16b, v8.16b , v12.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta 2468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rshrn v18.8b, v18.8h, #2 2478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rshrn v19.8b, v20.8h, #2 //(2*q1 + (p1 + q0) + 2) >> 2 2488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v18.d[1], v19.d[0] 2498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v2.16b, v14.16b , v8.16b 2508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v4.16b, v18.16b , v8.16b 2518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v1.d[0], v0.d[1] 2538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v3.d[0], v2.d[1] 2548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v5.d[0], v4.d[1] 2558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v7.d[0], v6.d[1] 2568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.16b, v1.16b 2588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v1.16b, v2.16b 2598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.16b, v4.16b 2608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.16b, v10.16b 2618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.16b, v3.16b 2628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v3.16b, v6.16b 2638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v6.16b, v5.16b 2648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v5.16b, v10.16b 2658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v0.h, v1.h, v2.h, v3.h}[0], [x12], x1 2678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v0.h, v1.h, v2.h, v3.h}[1], [x12], x1 2688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v0.h, v1.h, v2.h, v3.h}[2], [x12], x1 2698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v0.h, v1.h, v2.h, v3.h}[3], [x12], x1 2708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v4.h, v5.h, v6.h, v7.h}[0], [x12], x1 2728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v4.h, v5.h, v6.h, v7.h}[1], [x12], x1 2738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v4.h, v5.h, v6.h, v7.h}[2], [x12], x1 2748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v4.h, v5.h, v6.h, v7.h}[3], [x12], x1 2758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // LDMFD sp!,{x4,x5,x12,pc} 2778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldp x19, x20, [sp], #16 2788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pop_v_regs 2798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ret 2808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///** 2848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 2858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 2868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief 2878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Performs filtering of a chroma block horizontal edge for cases where the 2888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* boundary strength is less than 4 in high profile 2898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 2908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description: 2918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* This operation is described in Sec. 8.7.2.4 under the title 2928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 2938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 2948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] x0 - pu1_src 2958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Pointer to the src sample q0 2968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 297d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w1 - src_strd 2988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Source stride 2998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 300d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w2 - alpha_cb 3018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Alpha Value for the boundary in U 3028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 303d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w3 - beta_cb 3048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Beta Value for the boundary in U 3058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 306d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w4 - alpha_cr 3078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Alpha Value for the boundary in V 3088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 309d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w5 - beta_cr 3108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Beta Value for the boundary in V 3118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 312d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w6 - u4_bs 3138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Packed Boundary strength array 3148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 315d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] x7 - pu1_cliptab_cb 3168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* tc0_table for U 3178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 318d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] sp(0) - pu1_cliptab_cr 3198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* tc0_table for V 3208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 3218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns 3228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 3238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 3248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks 3258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 3268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 3278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 3288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/ 3298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_chroma_horz_bslt4_av8 3318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_chroma_horz_bslt4_av8: 3338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // STMFD sp!,{x4-x9,x14} // 3358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S push_v_regs 3368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stp x19, x20, [sp, #-16]! 337d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo sxtw x1, w1 338f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo ldr x8, [sp, #80] 3398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x0, x0, x1, lsl #1 //x0 = uc_edgePixelU pointing to p1 of chroma U 340f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo rev w6, w6 // 341f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo mov v12.s[0], w6 //D12[0] = ui_Bs 342f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo ld1 {v16.s}[0], [x7] //D16[0] contains cliptab_cb 343f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo ld1 {v17.s}[0], [x8] //D17[0] contains cliptab_cr 3448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld2 {v6.8b, v7.8b}, [x0], x1 //Q3=p1 3458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S tbl v14.8b, {v16.16b}, v12.8b //Retreiving cliptab values for U 3468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S tbl v28.8b, {v17.16b}, v12.8b //Retrieving cliptab values for V 3478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uxtl v12.8h, v12.8b //Q6 = uc_Bs in each 16 bit scalar 3488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x6, x0 //Keeping a backup of the pointer to chroma U P0 3498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld2 {v4.8b, v5.8b}, [x0], x1 //Q2=p0 3508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S movi v30.8b, #1 // 3518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v20.8b, w2 //D20 contains alpha_cb 3528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v21.8b, w4 //D21 contains alpha_cr 3538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v20.d[1], v21.d[0] 3548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld2 {v0.8b, v1.8b}, [x0], x1 //Q0=q0 3558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uxtl v14.8h, v14.8b // 3568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uxtl v28.8h, v28.8b // 3578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v15.d[0], v28.d[0] //D14 has cliptab values for U, D15 for V 3588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v14.d[1], v28.d[0] 3598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld2 {v2.8b, v3.8b}, [x0] //Q1=q1 3608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S usubl v10.8h, v1.8b, v5.8b // 3618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S usubl v8.8h, v0.8b, v4.8b //Q5,Q4 = (q0 - p0) 3628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v6.d[1], v7.d[0] 3638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.d[1], v5.d[0] 3648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v26.16b, v6.16b , v4.16b //Q13 = ABS(p1 - p0) 3658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S shl v10.8h, v10.8h, #2 //Q5 = (q0 - p0)<<2 3668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v0.d[1], v1.d[0] 3678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v22.16b, v4.16b , v0.16b //Q11 = ABS(p0 - q0) 3688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S shl v8.8h, v8.8h, #2 //Q4 = (q0 - p0)<<2 3698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v14.d[1], v15.d[0] 3708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sli v14.8h, v14.8h, #8 3718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v15.d[0], v14.d[1] 3728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.d[1], v3.d[0] 3738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v24.16b, v2.16b , v0.16b //Q12 = ABS(q1 - q0) 3748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhs v18.16b, v22.16b, v20.16b 3758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S usubl v20.8h, v6.8b, v2.8b //Q10 = (p1 - q1)L 3768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S usubl v6.8h, v7.8b, v3.8b //Q3 = (p1 - q1)H 3778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v16.8b, w3 //Q8 contains beta_cb 3788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v17.8b, w5 //Q8 contains beta_cr 3798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v16.d[1], v17.d[0] 3808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add v8.8h, v8.8h , v20.8h // 3818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add v10.8h, v10.8h , v6.8h //Q5,Q4 = [ (q0 - p0)<<2 ] + (p1 - q1) 3828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhs v24.16b, v24.16b, v16.16b 3838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmgt v12.4h, v12.4h, #0 3848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sqrshrn v8.8b, v8.8h, #3 // 3858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sqrshrn v9.8b, v10.8h, #3 //Q4 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3 3868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v8.d[1], v9.d[0] 3878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add v14.8b, v14.8b , v30.8b //D14 = C = C0+1 for U 3888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhs v26.16b, v26.16b, v16.16b 3898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S orr v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) 3908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S abs v6.16b, v8.16b //Q4 = ABS (i_macro) 3918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add v15.8b, v15.8b , v30.8b //D15 = C = C0+1 for V 3928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v14.d[1], v15.d[0] 3938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v13.8b, v12.8b 3948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v12.d[1], v13.d[0] // 3958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S orr v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta ) 3968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umin v14.16b, v6.16b , v14.16b //Q7 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro) 3978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bic v12.16b, v12.16b , v18.16b //final condition 3988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmge v8.16b, v8.16b, #0 3998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S and v14.16b, v14.16b , v12.16b //Making delta zero in places where values shouldn be filterd 4008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uqadd v16.16b, v4.16b , v14.16b //Q8 = p0 + delta 4018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uqsub v4.16b, v4.16b , v14.16b //Q2 = p0 - delta 4028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uqadd v18.16b, v0.16b , v14.16b //Q9 = q0 + delta 4038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uqsub v0.16b, v0.16b , v14.16b //Q0 = q0 - delta 4048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bif v16.16b, v4.16b , v8.16b //Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta) 4058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bif v0.16b, v18.16b , v8.16b //Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta) 4068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v17.d[0], v16.d[1] 4078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v1.d[0], v0.d[1] 4088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st2 {v16.8b, v17.8b}, [x6], x1 // 4098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st2 {v0.8b, v1.8b}, [x6] // 4108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldp x19, x20, [sp], #16 4128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pop_v_regs 4138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ret 4148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///** 4198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 4208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 4218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief 4228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Performs filtering of a chroma block vertical edge for cases where the 4238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* boundary strength is less than 4 in high profile 4248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 4258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description: 4268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* This operation is described in Sec. 8.7.2.4 under the title 4278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* "Filtering process for edges for bS equal to 4" in ITU T Rec H.264. 4288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 4298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] x0 - pu1_src 4308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Pointer to the src sample q0 4318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 432d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w1 - src_strd 4338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Source stride 4348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 435d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w2 - alpha_cb 4368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Alpha Value for the boundary in U 4378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 438d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w3 - beta_cb 4398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Beta Value for the boundary in U 4408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 441d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w4 - alpha_cr 4428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Alpha Value for the boundary in V 4438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 444d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w5 - beta_cr 4458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Beta Value for the boundary in V 4468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 447d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w6 - u4_bs 4488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Packed Boundary strength array 4498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 450d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] x7 - pu1_cliptab_cb 4518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* tc0_table for U 4528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 453d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] sp(0) - pu1_cliptab_cr 4548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* tc0_table for V 4558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 4568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns 4578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 4588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 4598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks 4608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 4618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 4628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 4638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/ 4648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_deblk_chroma_vert_bslt4_av8 4668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_chroma_vert_bslt4_av8: 4688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // STMFD sp!,{x4-x7,x10-x12,x14} 4708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S push_v_regs 4718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stp x19, x20, [sp, #-16]! 472d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo sxtw x1, w1 4738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x10, x7 4741b025fff7c9d8bc5692db1a2359ea1c9e4075cd5Martin Storsjo ldr x11, [sp, #80] //x11 = u4_bs 4758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x0, x0, #4 //point x0 to p1u of row0. 476d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo add w2, w2, w4, lsl #8 477d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo add w3, w3, w5, lsl #8 4788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x12, x0 //keep a back up of x0 for buffer write 4798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1 4808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1 4818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v0.h, v1.h, v2.h, v3.h}[2], [x0], x1 4828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v0.h, v1.h, v2.h, v3.h}[3], [x0], x1 4838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v4.h, v5.h, v6.h, v7.h}[0], [x0], x1 4858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v4.h, v5.h, v6.h, v7.h}[1], [x0], x1 4868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v4.h, v5.h, v6.h, v7.h}[2], [x0], x1 4878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld4 {v4.h, v5.h, v6.h, v7.h}[3], [x0], x1 4888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.16b, v2.16b 4908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.16b, v1.16b 4918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v1.16b, v4.16b 4928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.16b, v10.16b 4938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.16b, v6.16b 4948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v6.16b, v3.16b 4958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v3.16b, v5.16b 4968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v5.16b, v10.16b 4978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v22.8h, w2 //Q11 = alpha 4988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.d[1], v3.d[0] 4998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.d[1], v5.d[0] 5008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v8.16b, v2.16b , v4.16b //|p0-q0| 5018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dup v24.8h, w3 //Q12 = beta 5028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v25.d[0], v24.d[1] 5038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v6.d[1], v7.d[0] 5048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v0.d[1], v1.d[0] 5058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v10.16b, v6.16b , v4.16b //|q1-q0| 5068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uabd v12.16b, v0.16b , v2.16b //|p1-p0| 5078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhi v8.16b, v22.16b , v8.16b //|p0-q0| < alpha ? 5088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S usubl v14.8h, v0.8b, v6.8b 5098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhi v10.16b, v24.16b , v10.16b //|q1-q0| < beta ? 5108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S usubl v16.8h, v1.8b, v7.8b //(p1 - q1) 5118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhi v12.16b, v24.16b , v12.16b //|p1-p0| < beta ? 5128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S usubl v18.8h, v4.8b, v2.8b 5138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta 5148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S usubl v20.8h, v5.8b, v3.8b //(q0 - p0) 5158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S movi v28.8h, #4 5168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v24.s}[0], [x10] //Load ClipTable for U 5178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v25.s}[0], [x11] //Load ClipTable for V 5188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rev w6, w6 //Blocking strengths 5198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S and v8.16b, v8.16b , v12.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta 5208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.s[0], w6 5218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mla v14.8h, v18.8h , v28.8h 5228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mla v16.8h, v20.8h , v28.8h //4*(q0 - p0) + (p1 - q1) 5238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uxtl v10.8h, v10.8b 5248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sli v10.4h, v10.4h, #8 5258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S tbl v12.8b, {v24.16b}, v10.8b //tC0 for U 5268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S tbl v13.8b, {v25.16b}, v10.8b //tC0 for V 5278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zip1 v31.8b, v12.8b, v13.8b 5288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zip2 v13.8b, v12.8b, v13.8b 5298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v12.8b, v31.8b 5308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v12.d[1], v13.d[0] 5318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uxtl v10.4s, v10.4h 5328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sli v10.4s, v10.4s, #16 5338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S movi v24.16b, #1 5348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add v12.16b, v12.16b , v24.16b //tC0 + 1 5358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmhs v10.16b, v10.16b , v24.16b 5368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S and v8.16b, v8.16b , v10.16b //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0 5378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // Q0 - Q3(inputs), 5388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // Q4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0), 5398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // Q6 (tC) 5408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S srshr v14.8h, v14.8h, #3 5418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S srshr v16.8h, v16.8h, #3 //(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3) 5428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmgt v18.8h, v14.8h , #0 5438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmgt v20.8h, v16.8h , #0 5448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S xtn v18.8b, v18.8h 5458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S xtn v19.8b, v20.8h //Q9 = sign(delta) 5468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v18.d[1], v19.d[0] 5478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S abs v14.8h, v14.8h 5488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S abs v16.8h, v16.8h 5498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S xtn v14.8b, v14.8h 5508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S xtn v15.8b, v16.8h 5518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v14.d[1], v15.d[0] 5528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S umin v14.16b, v14.16b , v12.16b //Q7 = |delta| 5538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uqadd v20.16b, v2.16b , v14.16b //p0+|delta| 5548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uqadd v22.16b, v4.16b , v14.16b //q0+|delta| 5558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uqsub v24.16b, v2.16b , v14.16b //p0-|delta| 5568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S uqsub v26.16b, v4.16b , v14.16b //q0-|delta| 5578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v24.16b, v20.16b , v18.16b //p0 + delta 5588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v22.16b, v26.16b , v18.16b //q0 - delta 5598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v2.16b, v24.16b , v8.16b 5608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v4.16b, v22.16b , v8.16b 5618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v1.d[0], v0.d[1] 5628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v3.d[0], v2.d[1] 5638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v5.d[0], v4.d[1] 5648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v7.d[0], v6.d[1] 5658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.16b, v1.16b 5668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v1.16b, v2.16b 5678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.16b, v4.16b 5688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.16b, v10.16b 5698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v10.16b, v3.16b 5708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v3.16b, v6.16b 5718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v6.16b, v5.16b 5728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v5.16b, v10.16b 5738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v0.h, v1.h, v2.h, v3.h}[0], [x12], x1 5748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v0.h, v1.h, v2.h, v3.h}[1], [x12], x1 5758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v0.h, v1.h, v2.h, v3.h}[2], [x12], x1 5768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v0.h, v1.h, v2.h, v3.h}[3], [x12], x1 5778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v4.h, v5.h, v6.h, v7.h}[0], [x12], x1 5798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v4.h, v5.h, v6.h, v7.h}[1], [x12], x1 5808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v4.h, v5.h, v6.h, v7.h}[2], [x12], x1 5818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st4 {v4.h, v5.h, v6.h, v7.h}[3], [x12], x1 5828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldp x19, x20, [sp], #16 5848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pop_v_regs 5858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ret 5868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 588