18d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************
28d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
38d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Copyright (C) 2015 The Android Open Source Project
48d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
58d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Licensed under the Apache License, Version 2.0 (the "License");
68d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* you may not use this file except in compliance with the License.
78d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* You may obtain a copy of the License at:
88d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
98d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* http://www.apache.org/licenses/LICENSE-2.0
108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Unless required by applicable law or agreed to in writing, software
128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* distributed under the License is distributed on an "AS IS" BASIS,
138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* See the License for the specific language governing permissions and
158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* limitations under the License.
168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*****************************************************************************
188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///**
218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *******************************************************************************
228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @file
238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *  ih264_mem_fns_neon.s
248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *
258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @brief
268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *  Contains function definitions for memory manipulation
278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *
288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @author
298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *     Naveen SR
308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *
318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @par List of Functions:
328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *  - ih264_memcpy_av8()
338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *  - ih264_memcpy_mul_8_av8()
348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *  - ih264_memset_mul_8_av8()
358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *  - ih264_memset_16bit_mul_8_av8()
368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *  - ih264_memset_16bit_av8()
378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *
388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @remarks
398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *  None
408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *
418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// *******************************************************************************
428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.text
458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.p2align 2
468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.include "ih264_neon_macros.s"
478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///**
488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief
518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*   memcpy of a 1d array
528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description:
548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] pu1_dst
578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  UWORD8 pointer to the destination
588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] pu1_src
608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  UWORD8 pointer to the source
618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] num_bytes
638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  number of bytes to copy
648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns
658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks
678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*  None
688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*
698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                      UWORD8 *pu1_src,
738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                      UWORD8 num_bytes)
748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//**************Variables Vs Registers*************************
758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x0 => *pu1_dst
768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x1 => *pu1_src
778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x2 => num_bytes
788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_memcpy_mul_8_av8
848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_memcpy_mul_8_av8:
868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_neon_memcpy_mul_8:
888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Memcpy 8 bytes
898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld1       {v0.8b}, [x1], #8
908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st1       {v0.8b}, [x0], #8
918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bne       loop_neon_memcpy_mul_8
948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*******************************************************************************
998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/
1008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//void ih264_memcpy(UWORD8 *pu1_dst,
1018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                  UWORD8 *pu1_src,
1028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                  UWORD8 num_bytes)
1038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//**************Variables Vs Registers*************************
1048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x0 => *pu1_dst
1058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x1 => *pu1_src
1068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x2 => num_bytes
1078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_memcpy_av8
1118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_memcpy_av8:
1138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
1148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    blt       arm_memcpy
1158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_neon_memcpy:
1168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Memcpy 8 bytes
1178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ld1       {v0.8b}, [x1], #8
1188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st1       {v0.8b}, [x0], #8
1198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
1218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bge       loop_neon_memcpy
122b6d4342f4fbe676ac180b5ac1c821a1546f53176Martin Storsjo    cmn       x2, #8
1238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    beq       end_func1
1248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sarm_memcpy:
1268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add       x2, x2, #8
1278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_arm_memcpy:
1298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldrb      w3, [x1], #1
1308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sxtw      x3, w3
1318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    strb      w3, [x0], #1
1328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sxtw      x3, w3
1338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #1
1348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bne       loop_arm_memcpy
1358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
1368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Send_func1:
1378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
1388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//void ih264_memset_mul_8(UWORD8 *pu1_dst,
1418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                       UWORD8 value,
1428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                       UWORD8 num_bytes)
1438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//**************Variables Vs Registers*************************
1448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x0 => *pu1_dst
1458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x1 => value
1468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x2 => num_bytes
1478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_memset_mul_8_av8
1508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_memset_mul_8_av8:
1528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// Assumptions: numbytes is either 8, 16 or 32
1548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v0.8b, w1
1558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_memset_mul_8:
1568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Memset 8 bytes
1578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st1       {v0.8b}, [x0], #8
1588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
1608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bne       loop_memset_mul_8
1618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
1638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//void ih264_memset(UWORD8 *pu1_dst,
1668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                       UWORD8 value,
1678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                       UWORD8 num_bytes)
1688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//**************Variables Vs Registers*************************
1698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x0 => *pu1_dst
1708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x1 => value
1718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x2 => num_bytes
1728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_memset_av8
1768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_memset_av8:
1788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
1798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    blt       arm_memset
1808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v0.8b, w1
1818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_neon_memset:
1828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Memcpy 8 bytes
1838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st1       {v0.8b}, [x0], #8
1848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
1868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bge       loop_neon_memset
187b6d4342f4fbe676ac180b5ac1c821a1546f53176Martin Storsjo    cmn       x2, #8
1888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    beq       end_func2
1898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sarm_memset:
1918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add       x2, x2, #8
1928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_arm_memset:
1948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    strb      w1, [x0], #1
1958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sxtw      x1, w1
1968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #1
1978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bne       loop_arm_memset
1988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
1998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Send_func2:
2008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
2018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
2078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                                      UWORD16 value,
2088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                                      UWORD8 num_words)
2098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//**************Variables Vs Registers*************************
2108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x0 => *pu2_dst
2118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x1 => value
2128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x2 => num_words
2138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_memset_16bit_mul_8_av8
2168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_memset_16bit_mul_8_av8:
2188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// Assumptions: num_words is either 8, 16 or 32
2208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Memset 8 words
2228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v0.4h, w1
2238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_memset_16bit_mul_8:
2248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st1       {v0.4h}, [x0], #8
2258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st1       {v0.4h}, [x0], #8
2268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
2288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bne       loop_memset_16bit_mul_8
2298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
2318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//void ih264_memset_16bit(UWORD16 *pu2_dst,
2358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                       UWORD16 value,
2368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//                       UWORD8 num_words)
2378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//**************Variables Vs Registers*************************
2388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x0 => *pu2_dst
2398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x1 => value
2408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//    x2 => num_words
2418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264_memset_16bit_av8
2458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_memset_16bit_av8:
2478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
2488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    blt       arm_memset_16bit
2498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dup       v0.4h, w1
2508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_neon_memset_16bit:
2518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    // Memset 8 words
2528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st1       {v0.4h}, [x0], #8
2538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    st1       {v0.4h}, [x0], #8
2548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #8
2568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bge       loop_neon_memset_16bit
257b6d4342f4fbe676ac180b5ac1c821a1546f53176Martin Storsjo    cmn       x2, #8
2588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    beq       end_func3
2598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sarm_memset_16bit:
2618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add       x2, x2, #8
2628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sloop_arm_memset_16bit:
2648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    strh      w1, [x0], #2
2658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sxtw      x1, w1
2668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs      x2, x2, #1
2678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bne       loop_arm_memset_16bit
2688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
2698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Send_func3:
2718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ret
2728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
275