18d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//****************************************************************************** 28d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 38d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Copyright (C) 2015 The Android Open Source Project 48d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 58d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Licensed under the Apache License, Version 2.0 (the "License"); 68d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* you may not use this file except in compliance with the License. 78d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* You may obtain a copy of the License at: 88d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 98d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* http://www.apache.org/licenses/LICENSE-2.0 108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Unless required by applicable law or agreed to in writing, software 128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* distributed under the License is distributed on an "AS IS" BASIS, 138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* See the License for the specific language governing permissions and 158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* limitations under the License. 168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//***************************************************************************** 188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/ 208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///** 218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S///** 228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @brief 258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Interprediction luma function for copy 268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @par Description: 288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* Copies the array of width 'wd' and height 'ht' from the location pointed 298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* by 'src' to the location pointed by 'dst' 308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] pu1_src 328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* UWORD8 pointer to the source 338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[out] pu1_dst 358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* UWORD8 pointer to the destination 368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] src_strd 388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* integer source stride 398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] dst_strd 418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* integer destination stride 428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] ht 458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* integer height of the array 468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @param[in] wd 488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* integer width of the array 498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @returns 518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* @remarks 538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* None 548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//* 558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//******************************************************************************* 568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//*/ 578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//void ih264_inter_pred_luma_copy ( 588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// UWORD8 *pu1_src, 598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// UWORD8 *pu1_dst, 608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// WORD32 src_strd, 618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// WORD32 dst_strd, 628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// WORD32 ht, 638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// WORD32 wd ) 648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S//**************Variables Vs Registers***************************************** 668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x0 => *pu1_src 678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x1 => *pu1_dst 688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x2 => src_strd 698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x3 => dst_strd 708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x7 => ht 718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x12 => wd 728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.text 748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.p2align 2 758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.include "ih264_neon_macros.s" 768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_inter_pred_luma_copy_av8 808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_inter_pred_luma_copy_av8: 828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S push_v_regs 848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S stp x19, x20, [sp, #-16]! 858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x12, x5 878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x7, x4 888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S cmp x7, #0 //checks ht == 0 898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ble end_loops 908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S tst x12, #15 //checks wd for multiples for 4 & 8 918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S beq core_loop_wd_16 928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S tst x12, #7 //checks wd for multiples for 4 & 8 938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S beq core_loop_wd_8 948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x11, x12, #4 958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Souter_loop_wd_4: 978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x4, x12, #0 //checks wd == 0 988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ble end_inner_loop_wd_4 998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sinner_loop_wd_4: 1018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v0.s}[0], [x0] //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 1028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add x5, x0, x2 //pu1_src_tmp += src_strd 1038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add x6, x1, x3 //pu1_dst_tmp += dst_strd 1048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v0.s}[0], [x1] //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) 1058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 1068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add x0, x0, #4 //pu1_src += 4 1078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) 1088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 1098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x4, x4, #4 //(wd -4) 1108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) 1118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v0.s}[0], [x5], x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 1128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add x1, x1, #4 //pu1_dst += 4 1138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v0.s}[0], [x6], x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) 1148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bgt inner_loop_wd_4 1168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Send_inner_loop_wd_4: 1188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x7, x7, #4 //ht - 4 1198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x0, x5, x11 //pu1_src = pu1_src_tmp 1208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x1, x6, x11 //pu1_dst = pu1_dst_tmp 1218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bgt outer_loop_wd_4 1228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Send_loops: 1248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // LDMFD sp!,{x4-x12,x15} //Reload the registers from SP 1258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldp x19, x20, [sp], #16 1268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pop_v_regs 1278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ret 1288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Score_loop_wd_8: 1318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x11, x12, #8 1328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Souter_loop_wd_8: 1348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x4, x12, #0 //checks wd 1358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ble end_inner_loop_wd_8 1368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sinner_loop_wd_8: 1388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add x5, x0, x2 //pu1_src_tmp += src_strd 1398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v0.8b}, [x0], #8 //vld1_u8(pu1_src_tmp) 1408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add x6, x1, x3 //pu1_dst_tmp += dst_strd 1418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v0.8b}, [x1], #8 //vst1_u8(pu1_dst_tmp, tmp_src) 1428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v1.8b}, [x5], x2 //vld1_u8(pu1_src_tmp) 1438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v1.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x4, x4, #8 //wd - 8(Loop condition) 1458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v2.8b}, [x5], x2 //vld1_u8(pu1_src_tmp) 1468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v2.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v3.8b}, [x5], x2 //vld1_u8(pu1_src_tmp) 1488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v3.8b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bgt inner_loop_wd_8 1508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Send_inner_loop_wd_8: 1528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x7, x7, #4 //ht -= 4 1538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x0, x5, x11 //pu1_src = pu1_src_tmp 1548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x1, x6, x11 //pu1_dst = pu1_dst_tmp 1558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bgt outer_loop_wd_8 1568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // LDMFD sp!,{x4-x12,x15} //Reload the registers from SP 1588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldp x19, x20, [sp], #16 1598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pop_v_regs 1608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ret 1618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Score_loop_wd_16: 1638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x11, x12, #16 1648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Souter_loop_wd_16: 1668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x4, x12, #0 //checks wd 1678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ble end_inner_loop_wd_16 1688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sinner_loop_wd_16: 1708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add x5, x0, x2 //pu1_src_tmp += src_strd 1718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 { v0.16b}, [x0], #16 //vld1_u8(pu1_src_tmp) 1728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S add x6, x1, x3 //pu1_dst_tmp += dst_strd 1738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 { v0.16b}, [x1], #16 //vst1_u8(pu1_dst_tmp, tmp_src) 1748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 { v2.16b}, [x5], x2 //vld1_u8(pu1_src_tmp) 1758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 { v2.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x4, x4, #16 //wd - 8(Loop condition) 1778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 { v4.16b}, [x5], x2 //vld1_u8(pu1_src_tmp) 1788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 { v4.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 { v6.16b}, [x5], x2 //vld1_u8(pu1_src_tmp) 1808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 { v6.16b}, [x6], x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bgt inner_loop_wd_16 1828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Send_inner_loop_wd_16: 1848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S subs x7, x7, #4 //ht -= 4 1858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x0, x5, x11 //pu1_src = pu1_src_tmp 1868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sub x1, x6, x11 //pu1_dst = pu1_dst_tmp 1878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bgt outer_loop_wd_16 1888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ldp x19, x20, [sp], #16 1918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pop_v_regs 1928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ret 1938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// /* 1968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// ******************************************************************************** 1978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 1988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @brief This function copies a 4x4 block to destination 1998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 2008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @par Description: 2018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * Copies a 4x4 block to destination, where both src and dst are interleaved 2028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 2038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @param[in] pi2_src 2048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * Source 2058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 2068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @param[in] pu1_out 2078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * Output pointer 2088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 2098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @param[in] pred_strd, 2108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * Prediction buffer stride 2118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 2128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @param[in] out_strd 2138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * output buffer buffer Stride 2148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 2158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @returns none 2168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 2178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * @remarks none 2188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * Currently wd and height is not used, ie a 4x4 block is always copied 2198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// * 2208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// ******************************************************************************* 2218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// */ 2228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// void ih264_interleave_copy(WORD16 *pi2_src, 2238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// UWORD8 *pu1_out, 2248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// WORD32 pred_strd, 2258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// WORD32 out_strd 2268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// WORD32 wd 2278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// WORD32 ht) 2288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// Register Usage 2298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x0 : pi2_src 2308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x1 : pu1_out 2318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x2 : src_strd 2328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// x3 : out_strd 2338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// Neon registers d0-d7, d16-d30 are used 2348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S// No need for pushing arm and neon registers 2358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S .global ih264_interleave_copy_av8 2378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264_interleave_copy_av8: 2388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S push_v_regs 2398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v2.8b}, [x0], x2 //load src plane 1 => d2 &pred palne 2 => d3 2408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v3.8b}, [x0], x2 2418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v2.d[1], v3.d[0] 2428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v4.8b}, [x0], x2 2438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v5.8b}, [x0], x2 2448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v4.d[1], v5.d[0] 2458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov x0, x1 2478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v18.8b}, [x1], x3 //load out [8 bit size) -8 coeffs 2498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v19.8b}, [x1], x3 2508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v18.d[1], v19.d[0] 2518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S movi v30.8h, #0x00ff 2528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v20.8b}, [x1], x3 2538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ld1 {v21.8b}, [x1], x3 2548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mov v20.d[1], v21.d[0] 2558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v18.16b, v2.16b , v30.16b 2578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S bit v20.16b, v4.16b , v30.16b 2588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v18.8b}, [x0], x3 //store out 2608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v18.d}[1], [x0], x3 2618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v20.8b}, [x0], x3 2628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S st1 {v20.d}[1], [x0], x3 2638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pop_v_regs 2658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ret 2668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 268