18d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************
28d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
38d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Copyright (C) 2015 The Android Open Source Project
48d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
58d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Licensed under the Apache License, Version 2.0 (the "License");
68d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* you may not use this file except in compliance with the License.
78d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* You may obtain a copy of the License at:
88d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
98d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* http://www.apache.org/licenses/LICENSE-2.0
108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Unless required by applicable law or agreed to in writing, software
128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* distributed under the License is distributed on an "AS IS" BASIS,
138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* See the License for the specific language governing permissions and
158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* limitations under the License.
168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*****************************************************************************
188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*****************************************************************************/
218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                                                                           */
228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*  File Name         : ih264_deblk_chroma_av8.s                              */
238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                                                                           */
248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*  Description       : Contains function definitions for deblocking luma    */
258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                      edge. Functions are coded in NEON assembly and can   */
268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                      be compiled using ARM RVDS.                          */
278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                                                                           */
288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*  List of Functions : ih264_deblk_chroma_vert_bs4_av8()              */
298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                      ih264_deblk_chroma_vert_bslt4_av8()            */
308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                      ih264_deblk_chroma_horz_bs4_av8()              */
318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                      ih264_deblk_chroma_horz_bslt4_av8()            */
328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*  Issues / Problems : None                                                 */
338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                                                                           */
348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*  Revision History  :                                                      */
358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*                                                                           */
368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*         28 11 2013   Ittiam          Draft                                */
388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///*****************************************************************************/
398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.text
428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.p2align 2
438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.include "ih264_neon_macros.s"
448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///**
468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief
498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*     Performs filtering of a chroma block horizontal edge when the
508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*     boundary strength is set to 4 in high profile
518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description:
538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*       This operation is described in  Sec. 8.7.2.4 under the title
548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*       "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] x0 - pu1_src
578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Pointer to the src sample q0
588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
59d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w1 - src_strd
608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Source stride
618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
62d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w2 - alpha_cb
638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Alpha Value for the boundary in U
648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
65d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w3 - beta_cb
668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Beta Value for the boundary in U
678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
68d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w4 - alpha_cr
698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Alpha Value for the boundary in V
708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
71d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w5 - beta_cr
728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Beta Value for the boundary in V
738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns
758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks
788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_deblk_chroma_horz_bs4_av8
848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_chroma_horz_bs4_av8:
868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // STMFD sp!,{x4-x6,x14}            //
888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    push_v_regs
898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    stp       x19, x20, [sp, #-16]!
90d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo    sxtw      x1, w1
918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       x6, x5
928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       x5, x4
938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub       x0, x0, x1, lsl #1        //x0 = uc_edgePixel pointing to p1 of chroma
948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld2       {v6.8b, v7.8b}, [x0], x1  //D6 = p1u , D7 = p1v
958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       x4, x0                    //Keeping a backup of the pointer p0 of chroma
968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld2       {v4.8b, v5.8b}, [x0], x1  //D4 = p0u , D5 = p0v
978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v20.8b, w2                //D20 contains alpha_cb
988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v21.8b, w5                //D21 contains alpha_cr
998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v20.d[1], v21.d[0]
1008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld2       {v0.8b, v1.8b}, [x0], x1  //D0 = q0u , D1 = q0v
1018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uaddl     v8.8h, v6.8b, v0.8b       //
1028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uaddl     v10.8h, v7.8b, v1.8b      //Q4,Q5 = q0 + p1
1038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    movi      v31.8b, #2                //
1048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld2       {v2.8b, v3.8b}, [x0]      //D2 = q1u , D3 = q1v
1058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v0.d[1], v1.d[0]
1068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v2.d[1], v3.d[0]
1078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v4.d[1], v5.d[0]
1088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v6.d[1], v7.d[0]
1098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v26.16b, v6.16b , v4.16b  //Q13 = ABS(p1 - p0)
1108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umlal     v8.8h, v2.8b, v31.8b      //
1118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umlal     v10.8h, v3.8b, v31.8b     //Q5,Q4 = (X2(q1U) + q0U + p1U)
1128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v22.16b, v4.16b , v0.16b  //Q11 = ABS(p0 - q0)
1138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v24.16b, v2.16b , v0.16b  //Q12 = ABS(q1 - q0)
1148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uaddl     v14.8h, v4.8b, v2.8b      //
1158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uaddl     v28.8h, v5.8b, v3.8b      //Q14,Q7 = P0 + Q1
1168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v16.8b, w3                //D16 contains beta_cb
1178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v17.8b, w6                //D17 contains beta_cr
1188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v16.d[1], v17.d[0]
1198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umlal     v14.8h, v6.8b, v31.8b     //
1208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umlal     v28.8h, v7.8b, v31.8b     //Q14,Q7 = (X2(p1U) + p0U + q1U)
1218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhs      v18.16b, v22.16b, v20.16b
1228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhs      v24.16b, v24.16b, v16.16b
1238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhs      v26.16b, v26.16b, v16.16b
1248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rshrn     v8.8b, v8.8h, #2          //
1258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rshrn     v9.8b, v10.8h, #2         //Q4 = (X2(q1U) + q0U + p1U + 2) >> 2
1268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v8.d[1], v9.d[0]
1278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    orr       v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
1288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rshrn     v10.8b, v14.8h, #2        //
1298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rshrn     v11.8b, v28.8h, #2        //Q5 = (X2(p1U) + p0U + q1U + 2) >> 2
1308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.d[1], v11.d[0]
1318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    orr       v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
1328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bit       v10.16b, v4.16b , v18.16b //
1338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bit       v8.16b, v0.16b , v18.16b  //
1348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v11.d[0], v10.d[1]
1358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v9.d[0], v8.d[1]
1368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st2       {v10.8b, v11.8b}, [x4], x1 //
1378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st2       {v8.8b, v9.8b}, [x4]      //
1388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // LDMFD sp!,{x4-x6,pc}                //
1398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldp       x19, x20, [sp], #16
1408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pop_v_regs
1418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
1428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///**
1468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
1478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
1488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief
1498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*     Performs filtering of a chroma block vertical edge when the
1508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*     boundary strength is set to 4 in high profile
1518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
1528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description:
1538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*       This operation is described in  Sec. 8.7.2.4 under the title
1548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*       "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
1558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
1568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] x0 - pu1_src
1578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Pointer to the src sample q0
1588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
159d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w1 - src_strd
1608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Source stride
1618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
162d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w2 - alpha_cb
1638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Alpha Value for the boundary in U
1648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
165d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w3 - beta_cb
1668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Beta Value for the boundary in U
1678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
168d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w4 - alpha_cr
1698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Alpha Value for the boundary in V
1708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
171d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w5 - beta_cr
1728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Beta Value for the boundary in V
1738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
1748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns
1758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
1768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
1778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks
1788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
1798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
1808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
1818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
1828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_deblk_chroma_vert_bs4_av8
1848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_chroma_vert_bs4_av8:
1868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // STMFD sp!,{x4,x5,x12,x14}
1888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    push_v_regs
1898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    stp       x19, x20, [sp, #-16]!
190d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo    sxtw      x1, w1
1918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub       x0, x0, #4                //point x0 to p1u of row0.
1938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       x12, x0                   //keep a back up of x0 for buffer write
1948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
195d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo    add       w2, w2, w4, lsl #8        //w2 = (alpha_cr,alpha_cb)
196d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo    add       w3, w3, w5, lsl #8        //w3 = (beta_cr,beta_cb)
1978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1
1998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1
2008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v0.h, v1.h, v2.h, v3.h}[2], [x0], x1
2018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v0.h, v1.h, v2.h, v3.h}[3], [x0], x1
2028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v4.h, v5.h, v6.h, v7.h}[0], [x0], x1
2048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v4.h, v5.h, v6.h, v7.h}[1], [x0], x1
2058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v4.h, v5.h, v6.h, v7.h}[2], [x0], x1
2068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v4.h, v5.h, v6.h, v7.h}[3], [x0], x1
2078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.16b, v2.16b
2098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v2.16b, v1.16b
2108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v1.16b, v4.16b
2118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v4.16b, v10.16b
2128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.16b, v6.16b
2138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v6.16b, v3.16b
2148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v3.16b, v5.16b
2158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v5.16b, v10.16b
2168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v22.8h, w2                //Q11 = alpha
2188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v24.8h, w3                //Q12 = beta
2198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    movi      v31.8b, #2
2208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v0.d[1], v1.d[0]
2228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v2.d[1], v3.d[0]
2238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v4.d[1], v5.d[0]
2248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v6.d[1], v7.d[0]
2258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v8.16b, v2.16b , v4.16b   //|p0-q0|
2278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v10.16b, v6.16b , v4.16b  //|q1-q0|
2288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v12.16b, v0.16b , v2.16b  //|p1-p0|
2298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uaddl     v14.8h, v2.8b, v6.8b
2308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uaddl     v16.8h, v3.8b, v7.8b      //(p0 + q1)
2318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhi      v8.16b, v22.16b , v8.16b  //|p0-q0| < alpha ?
2328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhi      v10.16b, v24.16b , v10.16b //|q1-q0| < beta ?
2338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhi      v12.16b, v24.16b , v12.16b //|p1-p0| < beta ?
2348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umlal     v14.8h, v0.8b, v31.8b
2358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umlal     v16.8h, v1.8b, v31.8b     //2*p1 + (p0 + q1)
2368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uaddl     v18.8h, v0.8b, v4.8b
2378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uaddl     v20.8h, v1.8b, v5.8b      //(p1 + q0)
2388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    and       v8.16b, v8.16b , v10.16b  //|p0-q0| < alpha && |q1-q0| < beta
2398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umlal     v18.8h, v6.8b, v31.8b
2408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umlal     v20.8h, v7.8b, v31.8b     //2*q1 + (p1 + q0)
2418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rshrn     v14.8b, v14.8h, #2
2438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rshrn     v15.8b, v16.8h, #2        //(2*p1 + (p0 + q1) + 2) >> 2
2448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v14.d[1], v15.d[0]
2458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    and       v8.16b, v8.16b , v12.16b  //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
2468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rshrn     v18.8b, v18.8h, #2
2478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rshrn     v19.8b, v20.8h, #2        //(2*q1 + (p1 + q0) + 2) >> 2
2488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v18.d[1], v19.d[0]
2498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bit       v2.16b, v14.16b , v8.16b
2508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bit       v4.16b, v18.16b , v8.16b
2518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v1.d[0], v0.d[1]
2538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v3.d[0], v2.d[1]
2548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v5.d[0], v4.d[1]
2558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v7.d[0], v6.d[1]
2568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.16b, v1.16b
2588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v1.16b, v2.16b
2598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v2.16b, v4.16b
2608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v4.16b, v10.16b
2618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.16b, v3.16b
2628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v3.16b, v6.16b
2638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v6.16b, v5.16b
2648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v5.16b, v10.16b
2658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v0.h, v1.h, v2.h, v3.h}[0], [x12], x1
2678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v0.h, v1.h, v2.h, v3.h}[1], [x12], x1
2688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v0.h, v1.h, v2.h, v3.h}[2], [x12], x1
2698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v0.h, v1.h, v2.h, v3.h}[3], [x12], x1
2708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v4.h, v5.h, v6.h, v7.h}[0], [x12], x1
2728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v4.h, v5.h, v6.h, v7.h}[1], [x12], x1
2738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v4.h, v5.h, v6.h, v7.h}[2], [x12], x1
2748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v4.h, v5.h, v6.h, v7.h}[3], [x12], x1
2758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // LDMFD sp!,{x4,x5,x12,pc}
2778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldp       x19, x20, [sp], #16
2788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pop_v_regs
2798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
2808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///**
2848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
2858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
2868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief
2878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*     Performs filtering of a chroma block horizontal edge for cases where the
2888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*     boundary strength is less than 4 in high profile
2898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
2908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description:
2918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*       This operation is described in  Sec. 8.7.2.4 under the title
2928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*       "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
2938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
2948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] x0 - pu1_src
2958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Pointer to the src sample q0
2968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
297d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w1 - src_strd
2988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Source stride
2998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
300d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w2 - alpha_cb
3018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Alpha Value for the boundary in U
3028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
303d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w3 - beta_cb
3048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Beta Value for the boundary in U
3058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
306d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w4 - alpha_cr
3078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Alpha Value for the boundary in V
3088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
309d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w5 - beta_cr
3108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Beta Value for the boundary in V
3118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
312d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w6 - u4_bs
3138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Packed Boundary strength array
3148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
315d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] x7 - pu1_cliptab_cb
3168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    tc0_table for U
3178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
318d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] sp(0) - pu1_cliptab_cr
3198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    tc0_table for V
3208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
3218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns
3228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
3238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
3248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks
3258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
3268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
3278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
3288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
3298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_deblk_chroma_horz_bslt4_av8
3318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_chroma_horz_bslt4_av8:
3338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // STMFD sp!,{x4-x9,x14}        //
3358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    push_v_regs
3368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    stp       x19, x20, [sp, #-16]!
337d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo    sxtw      x1, w1
338f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo    ldr       x8, [sp, #80]
3398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub       x0, x0, x1, lsl #1        //x0 = uc_edgePixelU pointing to p1 of chroma U
340f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo    rev       w6, w6                    //
341f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo    mov       v12.s[0], w6              //D12[0] = ui_Bs
342f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo    ld1       {v16.s}[0], [x7]          //D16[0] contains cliptab_cb
343f1c718c2673317b3bea0a48f21fdaa4e5caf228fMartin Storsjo    ld1       {v17.s}[0], [x8]          //D17[0] contains cliptab_cr
3448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld2       {v6.8b, v7.8b}, [x0], x1  //Q3=p1
3458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    tbl       v14.8b, {v16.16b}, v12.8b //Retreiving cliptab values for U
3468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    tbl       v28.8b, {v17.16b}, v12.8b //Retrieving cliptab values for V
3478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uxtl      v12.8h, v12.8b            //Q6 = uc_Bs in each 16 bit scalar
3488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       x6, x0                    //Keeping a backup of the pointer to chroma U P0
3498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld2       {v4.8b, v5.8b}, [x0], x1  //Q2=p0
3508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    movi      v30.8b, #1                //
3518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v20.8b, w2                //D20 contains alpha_cb
3528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v21.8b, w4                //D21 contains alpha_cr
3538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v20.d[1], v21.d[0]
3548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld2       {v0.8b, v1.8b}, [x0], x1  //Q0=q0
3558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uxtl      v14.8h, v14.8b            //
3568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uxtl      v28.8h, v28.8b            //
3578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v15.d[0], v28.d[0]        //D14 has cliptab values for U, D15 for V
3588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v14.d[1], v28.d[0]
3598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld2       {v2.8b, v3.8b}, [x0]      //Q1=q1
3608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    usubl     v10.8h, v1.8b, v5.8b      //
3618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    usubl     v8.8h, v0.8b, v4.8b       //Q5,Q4 = (q0 - p0)
3628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v6.d[1], v7.d[0]
3638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v4.d[1], v5.d[0]
3648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v26.16b, v6.16b , v4.16b  //Q13 = ABS(p1 - p0)
3658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    shl       v10.8h, v10.8h, #2        //Q5 = (q0 - p0)<<2
3668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v0.d[1], v1.d[0]
3678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v22.16b, v4.16b , v0.16b  //Q11 = ABS(p0 - q0)
3688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    shl       v8.8h, v8.8h, #2          //Q4 = (q0 - p0)<<2
3698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v14.d[1], v15.d[0]
3708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sli       v14.8h, v14.8h, #8
3718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v15.d[0], v14.d[1]
3728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v2.d[1], v3.d[0]
3738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v24.16b, v2.16b , v0.16b  //Q12 = ABS(q1 - q0)
3748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhs      v18.16b, v22.16b, v20.16b
3758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    usubl     v20.8h, v6.8b, v2.8b      //Q10 = (p1 - q1)L
3768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    usubl     v6.8h, v7.8b, v3.8b       //Q3 = (p1 - q1)H
3778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v16.8b, w3                //Q8 contains beta_cb
3788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v17.8b, w5                //Q8 contains beta_cr
3798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v16.d[1], v17.d[0]
3808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add       v8.8h, v8.8h , v20.8h     //
3818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add       v10.8h, v10.8h , v6.8h    //Q5,Q4 = [ (q0 - p0)<<2 ] + (p1 - q1)
3828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhs      v24.16b, v24.16b, v16.16b
3838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmgt      v12.4h, v12.4h, #0
3848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sqrshrn   v8.8b, v8.8h, #3          //
3858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sqrshrn   v9.8b, v10.8h, #3         //Q4 = i_macro = (((q0 - p0)<<2) + (p1 - q1) + 4)>>3
3868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v8.d[1], v9.d[0]
3878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add       v14.8b, v14.8b , v30.8b   //D14 = C = C0+1 for U
3888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhs      v26.16b, v26.16b, v16.16b
3898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    orr       v18.16b, v18.16b , v24.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta )
3908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    abs       v6.16b, v8.16b            //Q4 = ABS (i_macro)
3918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add       v15.8b, v15.8b , v30.8b   //D15 = C = C0+1 for V
3928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v14.d[1], v15.d[0]
3938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v13.8b, v12.8b
3948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v12.d[1], v13.d[0]        //
3958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    orr       v18.16b, v18.16b , v26.16b //Q9 = ( ABS(p0 - q0) >= Alpha ) | ( ABS(q1 - q0) >= Beta ) | ( ABS(p1 - p0) >= Beta )
3968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umin      v14.16b, v6.16b , v14.16b //Q7 = delta = (ABS(i_macro) > C) ? C : ABS(i_macro)
3978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bic       v12.16b, v12.16b , v18.16b //final condition
3988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmge      v8.16b, v8.16b, #0
3998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    and       v14.16b, v14.16b , v12.16b //Making delta zero in places where values shouldn be filterd
4008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uqadd     v16.16b, v4.16b , v14.16b //Q8 = p0 + delta
4018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uqsub     v4.16b, v4.16b , v14.16b  //Q2 = p0 - delta
4028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uqadd     v18.16b, v0.16b , v14.16b //Q9 = q0 + delta
4038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uqsub     v0.16b, v0.16b , v14.16b  //Q0 = q0 - delta
4048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bif       v16.16b, v4.16b , v8.16b  //Q8 = (i_macro >= 0 ) ? (p0+delta) : (p0-delta)
4058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bif       v0.16b, v18.16b , v8.16b  //Q0 = (i_macro >= 0 ) ? (q0-delta) : (q0+delta)
4068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v17.d[0], v16.d[1]
4078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v1.d[0], v0.d[1]
4088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st2       {v16.8b, v17.8b}, [x6], x1 //
4098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st2       {v0.8b, v1.8b}, [x6]      //
4108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldp       x19, x20, [sp], #16
4128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pop_v_regs
4138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
4148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///**
4198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
4208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
4218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief
4228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*     Performs filtering of a chroma block vertical edge for cases where the
4238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*     boundary strength is less than 4 in high profile
4248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
4258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description:
4268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*       This operation is described in  Sec. 8.7.2.4 under the title
4278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*       "Filtering process for edges for bS equal to 4" in ITU T Rec H.264.
4288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
4298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] x0 - pu1_src
4308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Pointer to the src sample q0
4318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
432d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w1 - src_strd
4338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Source stride
4348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
435d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w2 - alpha_cb
4368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Alpha Value for the boundary in U
4378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
438d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w3 - beta_cb
4398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  Beta Value for the boundary in U
4408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
441d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w4 - alpha_cr
4428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Alpha Value for the boundary in V
4438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
444d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w5 - beta_cr
4458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Beta Value for the boundary in V
4468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
447d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] w6 - u4_bs
4488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    Packed Boundary strength array
4498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
450d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] x7 - pu1_cliptab_cb
4518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    tc0_table for U
4528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
453d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo//* @param[in] sp(0) - pu1_cliptab_cr
4548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*    tc0_table for V
4558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
4568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns
4578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
4588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
4598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks
4608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
4618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
4628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
4638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
4648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_deblk_chroma_vert_bslt4_av8
4668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_deblk_chroma_vert_bslt4_av8:
4688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // STMFD sp!,{x4-x7,x10-x12,x14}
4708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    push_v_regs
4718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    stp       x19, x20, [sp, #-16]!
472d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo    sxtw      x1, w1
4738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       x10, x7
4741b025fff7c9d8bc5692db1a2359ea1c9e4075cd5Martin Storsjo    ldr       x11, [sp, #80]            //x11 = u4_bs
4758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub       x0, x0, #4                //point x0 to p1u of row0.
476d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo    add       w2, w2, w4, lsl #8
477d91f49ad65795b8d3223f1aba481bf3931b291e6Martin Storsjo    add       w3, w3, w5, lsl #8
4788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       x12, x0                   //keep a back up of x0 for buffer write
4798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v0.h, v1.h, v2.h, v3.h}[0], [x0], x1
4808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v0.h, v1.h, v2.h, v3.h}[1], [x0], x1
4818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v0.h, v1.h, v2.h, v3.h}[2], [x0], x1
4828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v0.h, v1.h, v2.h, v3.h}[3], [x0], x1
4838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v4.h, v5.h, v6.h, v7.h}[0], [x0], x1
4858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v4.h, v5.h, v6.h, v7.h}[1], [x0], x1
4868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v4.h, v5.h, v6.h, v7.h}[2], [x0], x1
4878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld4       {v4.h, v5.h, v6.h, v7.h}[3], [x0], x1
4888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.16b, v2.16b
4908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v2.16b, v1.16b
4918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v1.16b, v4.16b
4928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v4.16b, v10.16b
4938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.16b, v6.16b
4948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v6.16b, v3.16b
4958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v3.16b, v5.16b
4968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v5.16b, v10.16b
4978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v22.8h, w2                //Q11 = alpha
4988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v2.d[1], v3.d[0]
4998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v4.d[1], v5.d[0]
5008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v8.16b, v2.16b , v4.16b   //|p0-q0|
5018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v24.8h, w3                //Q12 = beta
5028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v25.d[0], v24.d[1]
5038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v6.d[1], v7.d[0]
5048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v0.d[1], v1.d[0]
5058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v10.16b, v6.16b , v4.16b  //|q1-q0|
5068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uabd      v12.16b, v0.16b , v2.16b  //|p1-p0|
5078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhi      v8.16b, v22.16b , v8.16b  //|p0-q0| < alpha ?
5088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    usubl     v14.8h, v0.8b, v6.8b
5098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhi      v10.16b, v24.16b , v10.16b //|q1-q0| < beta ?
5108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    usubl     v16.8h, v1.8b, v7.8b      //(p1 - q1)
5118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhi      v12.16b, v24.16b , v12.16b //|p1-p0| < beta ?
5128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    usubl     v18.8h, v4.8b, v2.8b
5138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    and       v8.16b, v8.16b , v10.16b  //|p0-q0| < alpha && |q1-q0| < beta
5148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    usubl     v20.8h, v5.8b, v3.8b      //(q0 - p0)
5158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    movi      v28.8h, #4
5168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld1       {v24.s}[0], [x10]         //Load ClipTable for U
5178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld1       {v25.s}[0], [x11]         //Load ClipTable for V
5188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rev       w6, w6                    //Blocking strengths
5198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    and       v8.16b, v8.16b , v12.16b  //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta
5208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.s[0], w6
5218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mla       v14.8h, v18.8h , v28.8h
5228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mla       v16.8h, v20.8h , v28.8h   //4*(q0 - p0) + (p1 - q1)
5238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uxtl      v10.8h, v10.8b
5248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sli       v10.4h, v10.4h, #8
5258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    tbl       v12.8b, {v24.16b}, v10.8b //tC0 for U
5268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    tbl       v13.8b, {v25.16b}, v10.8b //tC0 for V
5278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zip1      v31.8b, v12.8b, v13.8b
5288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zip2      v13.8b, v12.8b, v13.8b
5298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v12.8b, v31.8b
5308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v12.d[1], v13.d[0]
5318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uxtl      v10.4s, v10.4h
5328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sli       v10.4s, v10.4s, #16
5338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    movi      v24.16b, #1
5348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add       v12.16b, v12.16b , v24.16b //tC0 + 1
5358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmhs      v10.16b, v10.16b , v24.16b
5368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    and       v8.16b, v8.16b , v10.16b  //|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0
5378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Q0 - Q3(inputs),
5388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Q4 (|p0-q0| < alpha && |q1-q0| < beta && |p1-p0| < beta && u4_bs != 0),
5398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Q6 (tC)
5408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    srshr     v14.8h, v14.8h, #3
5418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    srshr     v16.8h, v16.8h, #3        //(((q0 - p0) << 2) + (p1 - q1) + 4) >> 3)
5428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmgt      v18.8h, v14.8h , #0
5438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmgt      v20.8h, v16.8h , #0
5448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    xtn       v18.8b, v18.8h
5458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    xtn       v19.8b, v20.8h            //Q9 = sign(delta)
5468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v18.d[1], v19.d[0]
5478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    abs       v14.8h, v14.8h
5488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    abs       v16.8h, v16.8h
5498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    xtn       v14.8b, v14.8h
5508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    xtn       v15.8b, v16.8h
5518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v14.d[1], v15.d[0]
5528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    umin      v14.16b, v14.16b , v12.16b //Q7 = |delta|
5538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uqadd     v20.16b, v2.16b , v14.16b //p0+|delta|
5548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uqadd     v22.16b, v4.16b , v14.16b //q0+|delta|
5558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uqsub     v24.16b, v2.16b , v14.16b //p0-|delta|
5568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    uqsub     v26.16b, v4.16b , v14.16b //q0-|delta|
5578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bit       v24.16b, v20.16b , v18.16b //p0 + delta
5588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bit       v22.16b, v26.16b , v18.16b //q0 - delta
5598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bit       v2.16b, v24.16b , v8.16b
5608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bit       v4.16b, v22.16b , v8.16b
5618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v1.d[0], v0.d[1]
5628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v3.d[0], v2.d[1]
5638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v5.d[0], v4.d[1]
5648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v7.d[0], v6.d[1]
5658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.16b, v1.16b
5668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v1.16b, v2.16b
5678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v2.16b, v4.16b
5688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v4.16b, v10.16b
5698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v10.16b, v3.16b
5708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v3.16b, v6.16b
5718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v6.16b, v5.16b
5728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov       v5.16b, v10.16b
5738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v0.h, v1.h, v2.h, v3.h}[0], [x12], x1
5748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v0.h, v1.h, v2.h, v3.h}[1], [x12], x1
5758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v0.h, v1.h, v2.h, v3.h}[2], [x12], x1
5768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v0.h, v1.h, v2.h, v3.h}[3], [x12], x1
5778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v4.h, v5.h, v6.h, v7.h}[0], [x12], x1
5798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v4.h, v5.h, v6.h, v7.h}[1], [x12], x1
5808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v4.h, v5.h, v6.h, v7.h}[2], [x12], x1
5818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st4       {v4.h, v5.h, v6.h, v7.h}[3], [x12], x1
5828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldp       x19, x20, [sp], #16
5848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pop_v_regs
5858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
5868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
588