10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//****************************************************************************** 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* ihevc_inter_pred_filters_luma_vert_w16inp.s 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* contains function definitions for inter prediction interpolation. 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* functions are coded using neon intrinsics and can be compiled using 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* rvct 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //author 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* yogeswaran rs 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //par list of functions: 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* - ihevc_inter_pred_luma_vert() 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //remarks 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* none 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/ 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///* all the functions here are replicated from ihevc_inter_pred_filters.c and modified to */ 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///* include reconstruction */ 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///** 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //brief 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* luma vertical filter for 16bit input. 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //par description: 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* applies a vertical filter with coefficients pointed to by 'pi1_coeff' to 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* the elements pointed by 'pu1_src' and writes to the location pointed by 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 'pu1_dst' input is 16 bits the filter output is downshifted by 12 and 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* clipped to lie between 0 and 255 assumptions : the function is 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* optimized considering the fact width is multiple of 4. and height as 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* multiple of 2. 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] pi2_src 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* word16 pointer to the source 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[out] pu1_dst 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* uword8 pointer to the destination 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] src_strd 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer source stride 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] dst_strd 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer destination stride 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] pi1_coeff 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* word8 pointer to the filter coefficients 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] ht 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer height of the array 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //param[in] wd 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer width of the array 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //returns 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //remarks 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* none 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/ 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//void ihevc_inter_pred_luma_vert_w16inp(word16 *pi2_src, 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// uword8 *pu1_dst, 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 src_strd, 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 dst_strd, 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word8 *pi1_coeff, 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 ht, 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 wd ) 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//**************variables vs registers***************************************** 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// r0 => *pu2_src 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// r1 => *pu1_dst 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// r2 => src_strd 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// r3 => dst_strd 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// r4 => *pi1_coeff 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// r5 => ht 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// r6 => wd 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.include "ihevc_neon_macros.s" 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_inter_pred_luma_vert_w16inp_w16out_av8 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_inter_pred_luma_vert_w16inp_w16out_av8, %function 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_inter_pred_luma_vert_w16inp_w16out_av8: 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //stmfd sp!, {r4-r12, r14} //stack stores the values of the arguments 1179cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stp x19,x20,[sp, #-16]! 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x15,x4 // pi1_coeff 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x16,x5 // ht 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x17,x6 // wd 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x12,x15 //load pi1_coeff 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar lsl x6,x3,#1 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x5,x17 //load wd 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.8b},[x12] //coeff = ld1_s8(pi1_coeff) 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar lsl x2, x2,#1 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x12,x2,x2,lsl #2 //src_ctrd & pi1_coeff 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vabs.s8 d0,d0 //vabs_s8(coeff) 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x0,x0,x12 //r0->pu1_src r12->pi1_coeff 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x3,x16 //load ht 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x7,x3,#0 //r3->ht 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //ble end_loops //end loop jump 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sxtl v0.8h,v0.8b 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v22.4h,v0.h[0] //coeffabs_0 = vdup_lane_u8(coeffabs, 0)// 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v23.4h,v0.h[1] //coeffabs_1 = vdup_lane_u8(coeffabs, 1)// 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v24.4h,v0.h[2] //coeffabs_2 = vdup_lane_u8(coeffabs, 2)// 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v25.4h,v0.h[3] //coeffabs_3 = vdup_lane_u8(coeffabs, 3)// 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v26.4h,v0.h[4] //coeffabs_4 = vdup_lane_u8(coeffabs, 4)// 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v27.4h,v0.h[5] //coeffabs_5 = vdup_lane_u8(coeffabs, 5)// 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v28.4h,v0.h[6] //coeffabs_6 = vdup_lane_u8(coeffabs, 6)// 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v29.4h,v0.h[7] //coeffabs_7 = vdup_lane_u8(coeffabs, 7)// 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar movi v30.4s,#8, lsl #16 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x9,x5,x6,lsl #2 //r6->dst_strd r5 ->wd 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar neg x9,x9 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x8,x5,x2,lsl #2 //r2->src_strd 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar neg x8,x8 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x8,x8,x5 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x9,x9,x5 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar lsr x3, x5, #2 //divide by 4 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mul x7, x7, x3 //multiply height by width 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x7, x7, #4 //subtract by one for epilog 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x4,x5 //r5 ->wd 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //mov r2, r2, lsl #1 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarprolog: 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x3,x0,x2 //pu1_src_tmp += src_strd// 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v1.4h},[x3],x2 //src_tmp2 = ld1_u8(pu1_src_tmp)// 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.4h},[x0], #8 //src_tmp1 = ld1_u8(pu1_src_tmp)// 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x4,x4,#4 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v2.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 1669cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v19.4s,v1.4h,v23.4h //mul_res1 = smull_u8(src_tmp2, coeffabs_1)// 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v3.4h},[x3],x2 //src_tmp4 = ld1_u8(pu1_src_tmp)// 1689cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v0.4h,v22.4h //mul_res1 = smlal_u8(mul_res1, src_tmp1, coeffabs_0)// 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v4.4h},[x3],x2 //src_tmp1 = ld1_u8(pu1_src_tmp)// 1709cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v2.4h,v24.4h //mul_res1 = smlal_u8(mul_res1, src_tmp3, coeffabs_2)// 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v5.4h},[x3],x2 //src_tmp2 = ld1_u8(pu1_src_tmp)// 1729cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v3.4h,v25.4h //mul_res1 = smlal_u8(mul_res1, src_tmp4, coeffabs_3)// 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v6.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 1749cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v4.4h,v26.4h //mul_res1 = smlal_u8(mul_res1, src_tmp1, coeffabs_4)// 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v7.4h},[x3],x2 //src_tmp4 = ld1_u8(pu1_src_tmp)// 1769cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v5.4h,v27.4h //mul_res1 = smlal_u8(mul_res1, src_tmp2, coeffabs_5)// 1779cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v6.4h,v28.4h //mul_res1 = smlal_u8(mul_res1, src_tmp3, coeffabs_6)// 1789cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v7.4h,v29.4h //mul_res1 = smlal_u8(mul_res1, src_tmp4, coeffabs_7)// 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v16.4h},[x3],x2 //src_tmp1 = ld1_u8(pu1_src_tmp)// 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1829cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v20.4s,v2.4h,v23.4h //mul_res2 = smull_u8(src_tmp3, coeffabs_1)// 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x20,x0,x8,lsl #0 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar csel x0,x20,x0,le 1859cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v1.4h,v22.4h //mul_res2 = smlal_u8(mul_res2, src_tmp2, coeffabs_0)// 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar csel x4,x5,x4,le 1879cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v3.4h,v24.4h //mul_res2 = smlal_u8(mul_res2, src_tmp4, coeffabs_2)// 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v17.4h},[x3],x2 //src_tmp2 = ld1_u8(pu1_src_tmp)// 1899cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v4.4h,v25.4h //mul_res2 = smlal_u8(mul_res2, src_tmp1, coeffabs_3)// 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v18.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 1919cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v5.4h,v26.4h //mul_res2 = smlal_u8(mul_res2, src_tmp2, coeffabs_4)// 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x3,x0,x2 //pu1_src_tmp += src_strd// 1939cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v6.4h,v27.4h //mul_res2 = smlal_u8(mul_res2, src_tmp3, coeffabs_5)// 1949cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v7.4h,v28.4h //mul_res2 = smlal_u8(mul_res2, src_tmp4, coeffabs_6)// 1959cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v16.4h,v29.4h //mul_res2 = smlal_u8(mul_res2, src_tmp1, coeffabs_7)// 1969cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v19.4s, v19.4s, v30.4s 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v1.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 1999cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v21.4s,v3.4h,v23.4h 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.4h},[x0],#8 //src_tmp1 = ld1_u8(pu1_src_tmp)// 2019cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v2.4h,v22.4h 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v2.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 2039cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v4.4h,v24.4h 2049cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v5.4h,v25.4h 2059cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v6.4h,v26.4h 2069cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v7.4h,v27.4h 2079cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v16.4h,v28.4h 2089cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v17.4h,v29.4h 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x14,x1,x6 2109cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v20.4s, v20.4s, v30.4s 2119cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v19.4h, v19.4s, #6 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d8,q4,#6 //sto_res = vqmovun_s16(sto_res_tmp)// 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2149cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v31.4s,v4.4h,v23.4h 2159cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v3.4h,v22.4h 2169cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v5.4h,v24.4h 2179cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v6.4h,v25.4h 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v3.4h},[x3],x2 //src_tmp4 = ld1_u8(pu1_src_tmp)// 2199cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v7.4h,v26.4h 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v4.4h},[x3],x2 //src_tmp1 = ld1_u8(pu1_src_tmp)// 2219cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v16.4h,v27.4h 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v5.4h},[x3],x2 //src_tmp2 = ld1_u8(pu1_src_tmp)// 2239cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v17.4h,v28.4h 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v6.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 2259cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v18.4h,v29.4h 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v7.4h},[x3],x2 //src_tmp4 = ld1_u8(pu1_src_tmp)// 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2289cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v19.2s},[x1],#8 //st1_u8(pu1_dst,sto_res)// 2299cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v21.4s, v21.4s, v30.4s 2309cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v20.4h, v20.4s, #6 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d10,q5,#6 //sto_res = vqmovun_s16(sto_res_tmp)// 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x20, x1, x9 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar csel x1, x20, x1, le 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x7,x7,#4 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar blt epilog_end //jumps to epilog_end 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar beq epilog //jumps to epilog 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarkernel_8: 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2439cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v19.4s,v1.4h,v23.4h //mul_res1 = smull_u8(src_tmp2, coeffabs_1)// 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x4,x4,#4 2459cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v0.4h,v22.4h //mul_res1 = smlal_u8(mul_res1, src_tmp1, coeffabs_0)// 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x20,x0,x8,lsl #0 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar csel x0,x20,x0,le 2489cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v2.4h,v24.4h //mul_res1 = smlal_u8(mul_res1, src_tmp3, coeffabs_2)// 2499cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v3.4h,v25.4h //mul_res1 = smlal_u8(mul_res1, src_tmp4, coeffabs_3)// 2509cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v4.4h,v26.4h //mul_res1 = smlal_u8(mul_res1, src_tmp1, coeffabs_4)// 2519cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v5.4h,v27.4h //mul_res1 = smlal_u8(mul_res1, src_tmp2, coeffabs_5)// 2529cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v6.4h,v28.4h //mul_res1 = smlal_u8(mul_res1, src_tmp3, coeffabs_6)// 2539cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v7.4h,v29.4h //mul_res1 = smlal_u8(mul_res1, src_tmp4, coeffabs_7)// 2549cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v20.2s},[x14],x6 //st1_u8(pu1_dst_tmp,sto_res)// 2559cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 2569cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v31.4S, v31.4s, v30.4s 2579cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v21.4h, v21.4s, #6 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d12,q6,#6 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v16.4h},[x3],x2 //src_tmp1 = ld1_u8(pu1_src_tmp)// 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2619cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v20.4s,v2.4h,v23.4h //mul_res2 = smull_u8(src_tmp3, coeffabs_1)// 2629cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v1.4h,v22.4h //mul_res2 = smlal_u8(mul_res2, src_tmp2, coeffabs_0)// 2639cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v3.4h,v24.4h //mul_res2 = smlal_u8(mul_res2, src_tmp4, coeffabs_2)// 2649cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v4.4h,v25.4h //mul_res2 = smlal_u8(mul_res2, src_tmp1, coeffabs_3)// 2659cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v5.4h,v26.4h //mul_res2 = smlal_u8(mul_res2, src_tmp2, coeffabs_4)// 2669cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v6.4h,v27.4h //mul_res2 = smlal_u8(mul_res2, src_tmp3, coeffabs_5)// 2679cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v21.2s},[x14],x6 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2699cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v7.4h,v28.4h //mul_res2 = smlal_u8(mul_res2, src_tmp4, coeffabs_6)// 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v17.4h},[x3],x2 //src_tmp2 = ld1_u8(pu1_src_tmp)// 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2729cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v16.4h,v29.4h //mul_res2 = smlal_u8(mul_res2, src_tmp1, coeffabs_7)// 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2749cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v19.4s, v19.4s, v30.4s 2759cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v31.4h, v31.4s, #6 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d14,q7,#6 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2789cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v21.4s,v3.4h,v23.4h 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar csel x4,x5,x4,le 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2819cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v2.4h,v22.4h 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v18.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2849cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v4.4h,v24.4h 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x3,x0,x2 //pu1_src_tmp += src_strd// 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2879cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v5.4h,v25.4h 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2899cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v6.4h,v26.4h 2909cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v31.2s},[x14],x6 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2929cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v7.4h,v27.4h 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v1.4h},[x3],x2 //src_tmp2 = ld1_u8(pu1_src_tmp)// 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2959cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v16.4h,v28.4h 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x14,x1,x6 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2989cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v17.4h,v29.4h 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.4h},[x0],#8 //src_tmp1 = ld1_u8(pu1_src_tmp)// 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3019cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v20.4s, v20.4s, v30.4s 3029cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v19.4h, v19.4s, #6 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d8,q4,#6 //sto_res = vqmovun_s16(sto_res_tmp)// 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v2.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3069cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v31.4s,v4.4h,v23.4h 3079cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v3.4h,v22.4h 3089cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v5.4h,v24.4h 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v3.4h},[x3],x2 //src_tmp4 = ld1_u8(pu1_src_tmp)// 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3119cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v6.4h,v25.4h 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v4.4h},[x3],x2 //src_tmp1 = ld1_u8(pu1_src_tmp)// 3139cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v7.4h,v26.4h 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v5.4h},[x3],x2 //src_tmp2 = ld1_u8(pu1_src_tmp)// 3159cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v16.4h,v27.4h 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v6.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 3179cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v17.4h,v28.4h 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v7.4h},[x3],x2 //src_tmp4 = ld1_u8(pu1_src_tmp)// 3199cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v18.4h,v29.4h 3209cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v19.2s},[x1],#8 //st1_u8(pu1_dst,sto_res)// 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3229cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v21.4s, v21.4s, v30.4s 3239cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v20.4h, v20.4s, #6 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x20, x1, x9 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar csel x1, x20, x1, le 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d10,q5,#6 //sto_res = vqmovun_s16(sto_res_tmp)// 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x7,x7,#4 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt kernel_8 //jumps to kernel_8 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepilog: 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3349cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v19.4s,v1.4h,v23.4h //mul_res1 = smull_u8(src_tmp2, coeffabs_1)// 3359cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v0.4h,v22.4h //mul_res1 = smlal_u8(mul_res1, src_tmp1, coeffabs_0)// 3369cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v2.4h,v24.4h //mul_res1 = smlal_u8(mul_res1, src_tmp3, coeffabs_2)// 3379cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v3.4h,v25.4h //mul_res1 = smlal_u8(mul_res1, src_tmp4, coeffabs_3)// 3389cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v4.4h,v26.4h //mul_res1 = smlal_u8(mul_res1, src_tmp1, coeffabs_4)// 3399cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v5.4h,v27.4h //mul_res1 = smlal_u8(mul_res1, src_tmp2, coeffabs_5)// 3409cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v6.4h,v28.4h //mul_res1 = smlal_u8(mul_res1, src_tmp3, coeffabs_6)// 3419cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v19.4s,v7.4h,v29.4h //mul_res1 = smlal_u8(mul_res1, src_tmp4, coeffabs_7)// 3429cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v20.2s},[x14],x6 3439cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 3449cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v31.4s, v31.4s, v30.4s 3459cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v21.4h, v21.4s, #6 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d12,q6,#6 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v16.4h},[x3],x2 //src_tmp1 = ld1_u8(pu1_src_tmp)// 3499cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v20.4s,v2.4h,v23.4h //mul_res2 = smull_u8(src_tmp3, coeffabs_1)// 3509cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v1.4h,v22.4h //mul_res2 = smlal_u8(mul_res2, src_tmp2, coeffabs_0)// 3519cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v3.4h,v24.4h //mul_res2 = smlal_u8(mul_res2, src_tmp4, coeffabs_2)// 3529cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v4.4h,v25.4h //mul_res2 = smlal_u8(mul_res2, src_tmp1, coeffabs_3)// 3539cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v5.4h,v26.4h //mul_res2 = smlal_u8(mul_res2, src_tmp2, coeffabs_4)// 3549cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v6.4h,v27.4h //mul_res2 = smlal_u8(mul_res2, src_tmp3, coeffabs_5)// 3559cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v7.4h,v28.4h //mul_res2 = smlal_u8(mul_res2, src_tmp4, coeffabs_6)// 3569cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v20.4s,v16.4h,v29.4h //mul_res2 = smlal_u8(mul_res2, src_tmp1, coeffabs_7)// 3579cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v21.2s},[x14],x6 3589cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 3599cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v19.4s, v19.4s, v30.4s 3609cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v31.4h, v31.4s, #6 3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d14,q7,#6 3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v17.4h},[x3],x2 //src_tmp2 = ld1_u8(pu1_src_tmp)// 3649cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v21.4s,v3.4h,v23.4h 3659cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v2.4h,v22.4h 3669cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v4.4h,v24.4h 3679cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v5.4h,v25.4h 3689cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v6.4h,v26.4h 3699cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v7.4h,v27.4h 3709cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v16.4h,v28.4h 3719cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v21.4s,v17.4h,v29.4h 3729cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v31.2s},[x14],x6 3739cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v20.4s, v20.4s, v30.4s 3749cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v19.4h, v19.4s, #6 3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d8,q4,#6 //sto_res = vqmovun_s16(sto_res_tmp)// 3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v18.4h},[x3],x2 //src_tmp3 = ld1_u8(pu1_src_tmp)// 3789cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smull v31.4s,v4.4h,v23.4h 3799cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v3.4h,v22.4h 3809cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v5.4h,v24.4h 3819cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v6.4h,v25.4h 3829cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v7.4h,v26.4h 3839cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v16.4h,v27.4h 3849cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v17.4h,v28.4h 3859cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy smlal v31.4s,v18.4h,v29.4h 3869cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v21.4s, v21.4s, v30.4s 3879cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v20.4h, v20.4s, #6 3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d10,q5,#6 //sto_res = vqmovun_s16(sto_res_tmp)// 3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x14,x1,x6 3919cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v19.2s},[x1],#8 //st1_u8(pu1_dst,sto_res)// 3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepilog_end: 3949cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v20.2s},[x14],x6 //st1_u8(pu1_dst_tmp,sto_res)// 3959cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v21.4h, v21.4s, #6 3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d12,q6,#6 3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3989cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v21.2s},[x14],x6 3999cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy sub v31.4s, v31.4s, v30.4s 4009cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy shrn v31.4h, v31.4s, #6 4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //vqrshrun d14,q7,#6 4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4039cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v31.2s},[x14],x6 4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_loops: 4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //ldmfd sp!,{r4-r12,r15} //reload the registers from sp 4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19, x20,[sp], #16 4109cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 419