10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///** 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //brief 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* interprediction luma function for copy 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //par description: 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* copies the array of width 'wd' and height 'ht' from the location pointed 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* by 'src' to the location pointed by 'dst' 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] pu1_src 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* uword8 pointer to the source 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[out] pu1_dst 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* uword8 pointer to the destination 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] src_strd 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer source stride 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] dst_strd 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer destination stride 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] pi1_coeff 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* word8 pointer to the filter coefficients 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] ht 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer height of the array 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] wd 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer width of the array 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //returns 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //remarks 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* none 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/ 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//void ihevc_inter_pred_luma_copy ( 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// uword8 *pu1_src, 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// uword8 *pu1_dst, 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 src_strd, 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 dst_strd, 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word8 *pi1_coeff, 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 ht, 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 wd ) 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//**************variables vs registers***************************************** 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// x0 => *pu1_src 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// x1 => *pu1_dst 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// x2 => src_strd 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// x3 => dst_strd 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// x11 => ht 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// x16 => wd 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.include "ihevc_neon_macros.s" 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_inter_pred_luma_copy_av8 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_inter_pred_luma_copy_av8, %function 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_inter_pred_luma_copy_av8: 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // stmfd sp!, {x8-x16, lr} //stack stores the values of the arguments 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stp x19,x20,[sp, #-16]! 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x16,x6 //loads wd 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x11,x5 //loads ht 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmp x11,#0 //checks ht == 0 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ble end_loops 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tst x16,#15 //checks wd for multiples for 4 & 8 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar beq core_loop_wd_16 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tst x16,#7 //checks wd for multiples for 4 & 8 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar beq core_loop_wd_8 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x15,x16,#4 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarouter_loop_wd_4: 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x8,x16,#0 //checks wd == 0 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ble end_inner_loop_wd_4 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarinner_loop_wd_4: 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.s}[0],[x0] //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x9,x0,x2 //pu1_src_tmp += src_strd 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x10,x1,x3 //pu1_dst_tmp += dst_strd 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v0.s}[0],[x1] //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.s}[0],[x9],x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x0,x0,#4 //pu1_src += 4 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v0.s}[0],[x10],x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.s}[0],[x9],x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x8,x8,#4 //(wd -4) 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v0.s}[0],[x10],x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.s}[0],[x9],x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x1,x1,#4 //pu1_dst += 4 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v0.s}[0],[x10],x3 //vst1_lane_u32((uint32_t *)pu1_dst_tmp, src_tmp, 0) 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt inner_loop_wd_4 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_inner_loop_wd_4: 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x11,x11,#4 //ht - 4 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x0,x9,x15 //pu1_src = pu1_src_tmp 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x1,x10,x15 //pu1_dst = pu1_dst_tmp 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt outer_loop_wd_4 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_loops: 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // ldmfd sp!,{x8-x16,pc} //reload the registers from sp 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// MRS x20,PMCCFILTR_EL0 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x0,x20,x19 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19,x20,[sp],#16 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_wd_8: 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x15,x16,#8 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarouter_loop_wd_8: 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x8,x16,#0 //checks wd 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ble end_inner_loop_wd_8 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarinner_loop_wd_8: 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x9,x0,x2 //pu1_src_tmp += src_strd 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.8b},[x0],#8 //vld1_u8(pu1_src_tmp) 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x10,x1,x3 //pu1_dst_tmp += dst_strd 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v0.8b},[x1],#8 //vst1_u8(pu1_dst_tmp, tmp_src) 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v1.8b},[x9],x2 //vld1_u8(pu1_src_tmp) 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v1.8b},[x10],x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x8,x8,#8 //wd - 8(loop condition) 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v2.8b},[x9],x2 //vld1_u8(pu1_src_tmp) 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v2.8b},[x10],x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v3.8b},[x9],x2 //vld1_u8(pu1_src_tmp) 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v3.8b},[x10],x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt inner_loop_wd_8 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_inner_loop_wd_8: 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x11,x11,#4 //ht -= 4 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x0,x9,x15 //pu1_src = pu1_src_tmp 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x1,x10,x15 //pu1_dst = pu1_dst_tmp 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt outer_loop_wd_8 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // ldmfd sp!,{x8-x16,pc} //reload the registers from sp 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// MRS x20,PMCCFILTR_EL0 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x0,x20,x19 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19,x20,[sp],#16 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_wd_16: 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x15,x16,#16 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarouter_loop_wd_16: 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x8,x16,#0 //checks wd 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ble end_inner_loop_wd_16 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarinner_loop_wd_16: 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x9,x0,x2 //pu1_src_tmp += src_strd 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.16b},[x0],#16 //vld1_u8(pu1_src_tmp) 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x10,x1,x3 //pu1_dst_tmp += dst_strd 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v0.16b},[x1],#16 //vst1_u8(pu1_dst_tmp, tmp_src) 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v1.16b},[x9],x2 //vld1_u8(pu1_src_tmp) 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v1.16b},[x10],x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x8,x8,#16 //wd - 8(loop condition) 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v2.16b},[x9],x2 //vld1_u8(pu1_src_tmp) 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v2.16b},[x10],x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v3.16b},[x9],x2 //vld1_u8(pu1_src_tmp) 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v3.16b},[x10],x3 //vst1_u8(pu1_dst_tmp, tmp_src) 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt inner_loop_wd_16 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_inner_loop_wd_16: 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x11,x11,#4 //ht -= 4 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x0,x9,x15 //pu1_src = pu1_src_tmp 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x1,x10,x15 //pu1_dst = pu1_dst_tmp 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt outer_loop_wd_16 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // ldmfd sp!,{x8-x16,pc} //reload the registers from sp 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// MRS x20,PMCCFILTR_EL0 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x0,x20,x19 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19,x20,[sp],#16 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 200