10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@****************************************************************************** 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ihevc_inter_pred_filters_luma_vert.s 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* contains function definitions for inter prediction interpolation. 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* functions are coded using neon intrinsics and can be compiled using 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* rvct 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @author 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* parthiban v 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par list of functions: 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* - ihevc_inter_pred_luma_vert() 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* none 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/ 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/* all the functions here are replicated from ihevc_inter_pred_filters.c and modified to */ 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/* include reconstruction */ 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/** 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* interprediction luma filter for vertical input 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par description: 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* applies a vertical filter with coefficients pointed to by 'pi1_coeff' to 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* the elements pointed by 'pu1_src' and writes to the location pointed by 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 'pu1_dst' the output is downshifted by 6 and clipped to 8 bits 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* assumptions : the function is optimized considering the fact width is 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* multiple of 4 or 8. and height as multiple of 2. 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] pu1_src 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* uword8 pointer to the source 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[out] pu1_dst 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* uword8 pointer to the destination 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] src_strd 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* integer source stride 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] dst_strd 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* integer destination stride 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] pi1_coeff 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* word8 pointer to the filter coefficients 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] ht 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* integer height of the array 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] wd 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* integer width of the array 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @returns 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* none 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/ 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_inter_pred_luma_vert ( 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ uword8 *pu1_src, 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ uword8 *pu1_dst, 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 src_strd, 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 dst_strd, 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word8 *pi1_coeff, 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 ht, 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 wd ) 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************variables vs registers***************************************** 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r0 => *pu1_src 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r1 => *pu1_dst 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r2 => src_strd 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r6 => dst_strd 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r12 => *pi1_coeff 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r5 => ht 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r3 => wd 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4 108d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer.syntax unified 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_inter_pred_luma_vert_a9q 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_inter_pred_luma_vert_a9q, %function 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_inter_pred_luma_vert_a9q: 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r12,[sp,#40] @load pi1_coeff 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r6,r3 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r5,[sp,#48] @load wd 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d0},[r12] @coeff = vld1_s8(pi1_coeff) 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub r12,r2,r2,lsl #2 @src_ctrd & pi1_coeff 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vabs.s8 d0,d0 @vabs_s8(coeff) 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r0,r0,r12 @r0->pu1_src r12->pi1_coeff 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r3,[sp,#44] @load ht 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r7,r3,#0 @r3->ht 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ble end_loops @end loop jump 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d22,d0[0] @coeffabs_0 = vdup_lane_u8(coeffabs, 0)@ 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmp r5,#8 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d23,d0[1] @coeffabs_1 = vdup_lane_u8(coeffabs, 1)@ 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d24,d0[2] @coeffabs_2 = vdup_lane_u8(coeffabs, 2)@ 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d25,d0[3] @coeffabs_3 = vdup_lane_u8(coeffabs, 3)@ 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d26,d0[4] @coeffabs_4 = vdup_lane_u8(coeffabs, 4)@ 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d27,d0[5] @coeffabs_5 = vdup_lane_u8(coeffabs, 5)@ 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d28,d0[6] @coeffabs_6 = vdup_lane_u8(coeffabs, 6)@ 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d29,d0[7] @coeffabs_7 = vdup_lane_u8(coeffabs, 7)@ 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar blt core_loop_wd_4 @core loop wd 4 jump 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar str r0, [sp, #-4]! 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar str r1, [sp, #-4]! 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bic r4,r5,#7 @r5 ->wd 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rsb r9,r4,r6,lsl #2 @r6->dst_strd r5 ->wd 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rsb r8,r4,r2,lsl #2 @r2->src_strd 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r3, r5, lsr #3 @divide by 8 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mul r7, r3 @multiply height by width 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub r7, #4 @subtract by one for epilog 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarprolog: 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar and r10, r0, #31 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r0,r2 @pu1_src_tmp += src_strd@ 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d1},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d0},[r0]! @src_tmp1 = vld1_u8(pu1_src_tmp)@ 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r4,r4,#8 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d2},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q4,d1,d23 @mul_res1 = vmull_u8(src_tmp2, coeffabs_1)@ 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d3},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d0,d22 @mul_res1 = vmlsl_u8(mul_res1, src_tmp1, coeffabs_0)@ 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d4},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d2,d24 @mul_res1 = vmlsl_u8(mul_res1, src_tmp3, coeffabs_2)@ 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d5},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d3,d25 @mul_res1 = vmlal_u8(mul_res1, src_tmp4, coeffabs_3)@ 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d6},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d4,d26 @mul_res1 = vmlal_u8(mul_res1, src_tmp1, coeffabs_4)@ 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d7},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d5,d27 @mul_res1 = vmlsl_u8(mul_res1, src_tmp2, coeffabs_5)@ 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d16},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d6,d28 @mul_res1 = vmlal_u8(mul_res1, src_tmp3, coeffabs_6)@ 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d17},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d7,d29 @mul_res1 = vmlsl_u8(mul_res1, src_tmp4, coeffabs_7)@ 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d18},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q5,d2,d23 @mul_res2 = vmull_u8(src_tmp3, coeffabs_1)@ 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar addle r0,r0,r8 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d1,d22 @mul_res2 = vmlsl_u8(mul_res2, src_tmp2, coeffabs_0)@ 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bicle r4,r5,#7 @r5 ->wd 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d3,d24 @mul_res2 = vmlsl_u8(mul_res2, src_tmp4, coeffabs_2)@ 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r3] 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d4,d25 @mul_res2 = vmlal_u8(mul_res2, src_tmp1, coeffabs_3)@ 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r3, r2] 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d5,d26 @mul_res2 = vmlal_u8(mul_res2, src_tmp2, coeffabs_4)@ 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r3, r2, lsl #1] 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d6,d27 @mul_res2 = vmlsl_u8(mul_res2, src_tmp3, coeffabs_5)@ 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3, r3, r2 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d7,d28 @mul_res2 = vmlal_u8(mul_res2, src_tmp4, coeffabs_6)@ 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r3, r2, lsl #1] 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d16,d29 @mul_res2 = vmlsl_u8(mul_res2, src_tmp1, coeffabs_7)@ 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r0,r2 @pu1_src_tmp += src_strd@ 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d8,q4,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d1},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q6,d3,d23 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d0},[r0]! @src_tmp1 = vld1_u8(pu1_src_tmp)@ 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d2,d22 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d2},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d4,d24 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d5,d25 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d6,d26 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d7,d27 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d16,d28 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d17,d29 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r1,r6 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d8},[r1]! @vst1_u8(pu1_dst,sto_res)@ 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d10,q5,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar addle r1,r1,r9 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q7,d4,d23 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r7,r7,#4 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d3,d22 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d5,d24 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d6,d25 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d3},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d7,d26 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d4},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d16,d27 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d5},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d17,d28 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d6},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d18,d29 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d7},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d10},[r14],r6 @vst1_u8(pu1_dst_tmp,sto_res)@ 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d12,q6,#6 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar blt epilog_end @jumps to epilog_end 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar beq epilog @jumps to epilog 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarkernel_8: 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r4,r4,#8 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q4,d1,d23 @mul_res1 = vmull_u8(src_tmp2, coeffabs_1)@ 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar addle r0,r0,r8 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d0,d22 @mul_res1 = vmlsl_u8(mul_res1, src_tmp1, coeffabs_0)@ 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bicle r4,r5,#7 @r5 ->wd 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d2,d24 @mul_res1 = vmlsl_u8(mul_res1, src_tmp3, coeffabs_2)@ 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d16},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d3,d25 @mul_res1 = vmlal_u8(mul_res1, src_tmp4, coeffabs_3)@ 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d17},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d4,d26 @mul_res1 = vmlal_u8(mul_res1, src_tmp1, coeffabs_4)@ 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d18},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d5,d27 @mul_res1 = vmlsl_u8(mul_res1, src_tmp2, coeffabs_5)@ 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d6,d28 @mul_res1 = vmlal_u8(mul_res1, src_tmp3, coeffabs_6)@ 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d7,d29 @mul_res1 = vmlsl_u8(mul_res1, src_tmp4, coeffabs_7)@ 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d12},[r14],r6 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ and r11, r0, #31 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d14,q7,#6 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r0,r2 @pu1_src_tmp += src_strd@ 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q5,d2,d23 @mul_res2 = vmull_u8(src_tmp3, coeffabs_1)@ 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d0},[r0]! @src_tmp1 = vld1_u8(pu1_src_tmp)@ 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d1,d22 @mul_res2 = vmlsl_u8(mul_res2, src_tmp2, coeffabs_0)@ 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d3,d24 @mul_res2 = vmlsl_u8(mul_res2, src_tmp4, coeffabs_2)@ 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d1},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d4,d25 @mul_res2 = vmlal_u8(mul_res2, src_tmp1, coeffabs_3)@ 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d14},[r14],r6 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d5,d26 @mul_res2 = vmlal_u8(mul_res2, src_tmp2, coeffabs_4)@ 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r1,#0 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d6,d27 @mul_res2 = vmlsl_u8(mul_res2, src_tmp3, coeffabs_5)@ 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r1, r1, #8 2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d7,d28 @mul_res2 = vmlal_u8(mul_res2, src_tmp4, coeffabs_6)@ 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d16,d29 @mul_res2 = vmlsl_u8(mul_res2, src_tmp1, coeffabs_7)@ 2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar addle r1,r1,r9 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d8,q4,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ cmp r11, r10 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q6,d3,d23 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r10, r3, r2, lsl #3 @ 10*strd - 8+2 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d2,d22 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r10, r10, r2 @ 11*strd 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d4,d24 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d2},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d5,d25 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d6,d26 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d8},[r14],r6 @vst1_u8(pu1_dst,sto_res)@ 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r10] @11+ 0 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d7,d27 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r10, r2] @11+ 1*strd 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d16,d28 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r10, r2, lsl #1] @11+ 2*strd 3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d17,d29 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r10, r10, r2 @12*strd 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d10,q5,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r10, r2, lsl #1] @11+ 3*strd 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q7,d4,d23 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ mov r10, r11 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d3,d22 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r7,r7,#4 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d5,d24 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d6,d25 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d3},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d7,d26 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d4},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d16,d27 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d5},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d17,d28 3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d6},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d18,d29 3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d7},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d12,q6,#6 3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d10},[r14],r6 @vst1_u8(pu1_dst_tmp,sto_res)@ 3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt kernel_8 @jumps to kernel_8 3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepilog: 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q4,d1,d23 @mul_res1 = vmull_u8(src_tmp2, coeffabs_1)@ 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d0,d22 @mul_res1 = vmlsl_u8(mul_res1, src_tmp1, coeffabs_0)@ 3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d2,d24 @mul_res1 = vmlsl_u8(mul_res1, src_tmp3, coeffabs_2)@ 3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d3,d25 @mul_res1 = vmlal_u8(mul_res1, src_tmp4, coeffabs_3)@ 3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d4,d26 @mul_res1 = vmlal_u8(mul_res1, src_tmp1, coeffabs_4)@ 3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d5,d27 @mul_res1 = vmlsl_u8(mul_res1, src_tmp2, coeffabs_5)@ 3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d6,d28 @mul_res1 = vmlal_u8(mul_res1, src_tmp3, coeffabs_6)@ 3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d7,d29 @mul_res1 = vmlsl_u8(mul_res1, src_tmp4, coeffabs_7)@ 3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d12},[r14],r6 3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d14,q7,#6 3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d16},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q5,d2,d23 @mul_res2 = vmull_u8(src_tmp3, coeffabs_1)@ 3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d1,d22 @mul_res2 = vmlsl_u8(mul_res2, src_tmp2, coeffabs_0)@ 3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d3,d24 @mul_res2 = vmlsl_u8(mul_res2, src_tmp4, coeffabs_2)@ 3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d4,d25 @mul_res2 = vmlal_u8(mul_res2, src_tmp1, coeffabs_3)@ 3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d5,d26 @mul_res2 = vmlal_u8(mul_res2, src_tmp2, coeffabs_4)@ 3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d6,d27 @mul_res2 = vmlsl_u8(mul_res2, src_tmp3, coeffabs_5)@ 3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d7,d28 @mul_res2 = vmlal_u8(mul_res2, src_tmp4, coeffabs_6)@ 3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d16,d29 @mul_res2 = vmlsl_u8(mul_res2, src_tmp1, coeffabs_7)@ 3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d14},[r14],r6 3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d8,q4,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d17},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q6,d3,d23 3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d2,d22 3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d4,d24 3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d5,d25 3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d6,d26 3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d7,d27 3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d16,d28 3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d17,d29 3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r1,r6 3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d8},[r1]! @vst1_u8(pu1_dst,sto_res)@ 3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d10,q5,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d18},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q7,d4,d23 3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d3,d22 3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d5,d24 3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d6,d25 3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d7,d26 3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d16,d27 3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d17,d28 3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d18,d29 3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d10},[r14],r6 @vst1_u8(pu1_dst_tmp,sto_res)@ 3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d12,q6,#6 3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepilog_end: 3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d12},[r14],r6 4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d14,q7,#6 4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d14},[r14],r6 4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_loops: 4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tst r5,#7 4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r1, [sp], #4 4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r0, [sp], #4 4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 410d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer ldmfdeq sp!,{r4-r12,r15} @reload the registers from sp 4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r5, #4 4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r0, r0, #8 4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r1, r1, #8 4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r7, #16 4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ 4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_wd_4: 4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rsb r9,r5,r6,lsl #2 @r6->dst_strd r5 ->wd 4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rsb r8,r5,r2,lsl #2 @r2->src_strd 4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmov.i8 d4,#0 4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarouter_loop_wd_4: 4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r12,r5,#0 4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ble end_inner_loop_wd_4 @outer loop jump 4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarinner_loop_wd_4: 4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r0,r2 4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d4[1]},[r3],r2 @src_tmp1 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp1, 1)@ 4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r12,r12,#4 4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d5,d4[1] @src_tmp2 = vdup_lane_u32(src_tmp1, 1)@ 4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d5[1]},[r3],r2 @src_tmp2 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp2, 1)@ 4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d4[0]},[r0] @src_tmp1 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp1, 0)@ 4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q0,d5,d23 @mul_res1 = vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1)@ 4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d6,d5[1] @src_tmp3 = vdup_lane_u32(src_tmp2, 1)@ 4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r0,r0,#4 4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d6[1]},[r3],r2 @src_tmp3 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp3, 1)@ 4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q0,d4,d22 @mul_res1 = vmlsl_u8(mul_res1, vreinterpret_u8_u32(src_tmp1), coeffabs_0)@ 4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d7,d6[1] @src_tmp4 = vdup_lane_u32(src_tmp3, 1)@ 4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d7[1]},[r3],r2 @src_tmp4 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp4, 1)@ 4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q0,d6,d24 @mul_res1 = vmlsl_u8(mul_res1, vreinterpret_u8_u32(src_tmp3), coeffabs_2)@ 4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q4,d7,d23 4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d4,d7[1] @src_tmp1 = vdup_lane_u32(src_tmp4, 1)@ 4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q1,d7,d25 @mul_res2 = vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3)@ 4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d4[1]},[r3],r2 @src_tmp1 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp1, 1)@ 4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d6,d22 4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q0,d4,d26 @mul_res1 = vmlal_u8(mul_res1, vreinterpret_u8_u32(src_tmp1), coeffabs_4)@ 4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d5,d4[1] @src_tmp2 = vdup_lane_u32(src_tmp1, 1)@ 4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d4,d24 4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d5[1]},[r3],r2 @src_tmp2 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp2, 1)@ 4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q1,d5,d27 @mul_res2 = vmlsl_u8(mul_res2, vreinterpret_u8_u32(src_tmp2), coeffabs_5)@ 4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d6,d5[1] @src_tmp3 = vdup_lane_u32(src_tmp2, 1)@ 4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d5,d25 4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d6[1]},[r3],r2 @src_tmp3 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp3, 1)@ 4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q0,d6,d28 @mul_res1 = vmlal_u8(mul_res1, vreinterpret_u8_u32(src_tmp3), coeffabs_6)@ 4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d7,d6[1] @src_tmp4 = vdup_lane_u32(src_tmp3, 1)@ 4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d6,d26 4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d7[1]},[r3],r2 @src_tmp4 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp4, 1)@ 4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q1,d7,d29 @mul_res2 = vmlsl_u8(mul_res2, vreinterpret_u8_u32(src_tmp4), coeffabs_7)@ 4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d4,d7[1] 4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vadd.i16 q0,q0,q1 @mul_res1 = vaddq_u16(mul_res1, mul_res2)@ 4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d7,d27 4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d4[1]},[r3],r2 4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d4,d28 4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d5,d4[1] 4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d0,q0,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d5[1]},[r3] 4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r1,r6 4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d0[0]},[r1] @vst1_lane_u32((uint32_t *)pu1_dst, vreinterpret_u32_u8(sto_res), 0)@ 4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d5,d29 4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d0[1]},[r3],r6 @vst1_lane_u32((uint32_t *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1)@ 4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vqrshrun.s16 d8,q4,#6 4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d8[0]},[r3],r6 4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r1,r1,#4 4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d8[1]},[r3] 4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt inner_loop_wd_4 4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_inner_loop_wd_4: 4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r7,r7,#4 4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r1,r1,r9 4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r0,r0,r8 4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt outer_loop_wd_4 4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldmfd sp!, {r4-r12, r15} @reload the registers from sp 4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/** 4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief 5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* interprediction luma filter for vertical 16bit output 5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par description: 5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* applies a vertical filter with coefficients pointed to by 'pi1_coeff' to 5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* the elements pointed by 'pu1_src' and writes to the location pointed by 5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 'pu1_dst' no downshifting or clipping is done and the output is used as 5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* an input for weighted prediction assumptions : the function is optimized 5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* considering the fact width is multiple of 4 or 8. and height as multiple 5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* of 2. 5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] pu1_src 5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* uword8 pointer to the source 5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[out] pi2_dst 5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* word16 pointer to the destination 5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] src_strd 5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* integer source stride 5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] dst_strd 5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* integer destination stride 5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] pi1_coeff 5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* word8 pointer to the filter coefficients 5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] ht 5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* integer height of the array 5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] wd 5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* integer width of the array 5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @returns 5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks 5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* none 5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/ 5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_inter_pred_luma_vert_w16out(uword8 *pu1_src, 5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word16 *pi2_dst, 5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 src_strd, 5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 dst_strd, 5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word8 *pi1_coeff, 5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 ht, 5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ word32 wd ) 5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************variables vs registers***************************************** 5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r0 => *pu1_src 5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r1 => *pu1_dst 5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r2 => src_strd 5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r6 => dst_strd 5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r12 => *pi1_coeff 5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r5 => ht 5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ r3 => wd 5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_inter_pred_luma_vert_w16out_a9q 5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_inter_pred_luma_vert_w16out_a9q, %function 5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_inter_pred_luma_vert_w16out_a9q: 5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments 5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r12,[sp,#40] @load pi1_coeff 5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r6,r3 5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r5,[sp,#48] @load wd 5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d0},[r12] @coeff = vld1_s8(pi1_coeff) 5720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub r12,r2,r2,lsl #2 @src_ctrd & pi1_coeff 5730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vabs.s8 d0,d0 @vabs_s8(coeff) 5740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r0,r0,r12 @r0->pu1_src r12->pi1_coeff 5750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r3,[sp,#44] @load ht 5760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r7,r3,#0 @r3->ht 5770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ble end_loops_16out @end loop jump 5780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d22,d0[0] @coeffabs_0 = vdup_lane_u8(coeffabs, 0)@ 5790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmp r5,#8 5800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d23,d0[1] @coeffabs_1 = vdup_lane_u8(coeffabs, 1)@ 5810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d24,d0[2] @coeffabs_2 = vdup_lane_u8(coeffabs, 2)@ 5820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d25,d0[3] @coeffabs_3 = vdup_lane_u8(coeffabs, 3)@ 5830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d26,d0[4] @coeffabs_4 = vdup_lane_u8(coeffabs, 4)@ 5840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d27,d0[5] @coeffabs_5 = vdup_lane_u8(coeffabs, 5)@ 5850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d28,d0[6] @coeffabs_6 = vdup_lane_u8(coeffabs, 6)@ 5860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u8 d29,d0[7] @coeffabs_7 = vdup_lane_u8(coeffabs, 7)@ 5870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar blt core_loop_wd_4_16out @core loop wd 4 jump 5880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar str r0, [sp, #-4]! 5890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar str r1, [sp, #-4]! 5900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bic r4,r5,#7 @r5 ->wd 5920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rsb r9,r4,r6,lsl #2 @r6->dst_strd r5 ->wd 5930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rsb r8,r4,r2,lsl #2 @r2->src_strd 5940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r6, r6, lsl #1 5950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r3, r5, lsr #3 @divide by 8 5960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mul r7, r3 @multiply height by width 5970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub r7, #4 @subtract by one for epilog 5980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarprolog_16out: 6000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar and r10, r0, #31 6020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r0,r2 @pu1_src_tmp += src_strd@ 6030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d1},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 6050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d0},[r0]! @src_tmp1 = vld1_u8(pu1_src_tmp)@ 6060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r4,r4,#8 6070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d2},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 6080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q4,d1,d23 @mul_res1 = vmull_u8(src_tmp2, coeffabs_1)@ 6090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d3},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 6100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d0,d22 @mul_res1 = vmlsl_u8(mul_res1, src_tmp1, coeffabs_0)@ 6110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d4},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 6120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d2,d24 @mul_res1 = vmlsl_u8(mul_res1, src_tmp3, coeffabs_2)@ 6130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d5},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 6140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d3,d25 @mul_res1 = vmlal_u8(mul_res1, src_tmp4, coeffabs_3)@ 6150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d6},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 6160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d4,d26 @mul_res1 = vmlal_u8(mul_res1, src_tmp1, coeffabs_4)@ 6170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d7},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 6180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d5,d27 @mul_res1 = vmlsl_u8(mul_res1, src_tmp2, coeffabs_5)@ 6190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d16},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 6200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d6,d28 @mul_res1 = vmlal_u8(mul_res1, src_tmp3, coeffabs_6)@ 6210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d17},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 6220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d7,d29 @mul_res1 = vmlsl_u8(mul_res1, src_tmp4, coeffabs_7)@ 6230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar addle r0,r0,r8 6260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q5,d2,d23 @mul_res2 = vmull_u8(src_tmp3, coeffabs_1)@ 6270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bicle r4,r5,#7 @r5 ->wd 6290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d1,d22 @mul_res2 = vmlsl_u8(mul_res2, src_tmp2, coeffabs_0)@ 6300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d18},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 6320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d3,d24 @mul_res2 = vmlsl_u8(mul_res2, src_tmp4, coeffabs_2)@ 6330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r3] 6350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d4,d25 @mul_res2 = vmlal_u8(mul_res2, src_tmp1, coeffabs_3)@ 6360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r3, r2] 6370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d5,d26 @mul_res2 = vmlal_u8(mul_res2, src_tmp2, coeffabs_4)@ 6380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r3, r2, lsl #1] 6390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d6,d27 @mul_res2 = vmlsl_u8(mul_res2, src_tmp3, coeffabs_5)@ 6400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3, r3, r2 6410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d7,d28 @mul_res2 = vmlal_u8(mul_res2, src_tmp4, coeffabs_6)@ 6420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r3, r2, lsl #1] 6430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d16,d29 @mul_res2 = vmlsl_u8(mul_res2, src_tmp1, coeffabs_7)@ 6440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r0,r2 @pu1_src_tmp += src_strd@ 6460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q6,d3,d23 6470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d1},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 6480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d2,d22 6490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d0},[r0]! @src_tmp1 = vld1_u8(pu1_src_tmp)@ 6500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d4,d24 6510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d2},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 6520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d5,d25 6530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d6,d26 6540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d7,d27 6550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d16,d28 6560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d17,d29 6570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r1,r6 6580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d8, d9},[r1]! @vst1_u8(pu1_dst,sto_res)@ 6590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d10,q5,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 6600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar addle r1,r1,r9,lsl #1 6610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q7,d4,d23 6630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r7,r7,#4 6640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d3,d22 6650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d5,d24 6660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d6,d25 6670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d3},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 6680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d7,d26 6690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d4},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 6700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d16,d27 6710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d5},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 6720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d17,d28 6730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d6},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 6740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d18,d29 6750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d7},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 6760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d10, d11},[r14],r6 @vst1_u8(pu1_dst_tmp,sto_res)@ 6780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d12,q6,#6 6790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar blt epilog_end_16out 6820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar beq epilog_16out @jumps to epilog 6830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarkernel_8_16out: 6850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r4,r4,#8 6870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q4,d1,d23 @mul_res1 = vmull_u8(src_tmp2, coeffabs_1)@ 6880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar addle r0,r0,r8 6900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d0,d22 @mul_res1 = vmlsl_u8(mul_res1, src_tmp1, coeffabs_0)@ 6910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d16},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 6930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d2,d24 @mul_res1 = vmlsl_u8(mul_res1, src_tmp3, coeffabs_2)@ 6940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d17},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 6960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d3,d25 @mul_res1 = vmlal_u8(mul_res1, src_tmp4, coeffabs_3)@ 6970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bicle r4,r5,#7 @r5 ->wd 6990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d4,d26 @mul_res1 = vmlal_u8(mul_res1, src_tmp1, coeffabs_4)@ 7000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d18},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 7020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d5,d27 @mul_res1 = vmlsl_u8(mul_res1, src_tmp2, coeffabs_5)@ 7030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d12,d13},[r14],r6 7050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d6,d28 @mul_res1 = vmlal_u8(mul_res1, src_tmp3, coeffabs_6)@ 7060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r0,r2 @pu1_src_tmp += src_strd@ 7080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d7,d29 @mul_res1 = vmlsl_u8(mul_res1, src_tmp4, coeffabs_7)@ 7090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ and r11, r0, #31 7120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q5,d2,d23 @mul_res2 = vmull_u8(src_tmp3, coeffabs_1)@ 7130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d14,d15},[r14],r6 7150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d1,d22 @mul_res2 = vmlsl_u8(mul_res2, src_tmp2, coeffabs_0)@ 7160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r1,r6 7180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d3,d24 @mul_res2 = vmlsl_u8(mul_res2, src_tmp4, coeffabs_2)@ 7190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d0},[r0]! @src_tmp1 = vld1_u8(pu1_src_tmp)@ 7210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d4,d25 @mul_res2 = vmlal_u8(mul_res2, src_tmp1, coeffabs_3)@ 7220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d1},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 7240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d5,d26 @mul_res2 = vmlal_u8(mul_res2, src_tmp2, coeffabs_4)@ 7250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d8,d9},[r1]! @vst1_u8(pu1_dst,sto_res)@ 7270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d6,d27 @mul_res2 = vmlsl_u8(mul_res2, src_tmp3, coeffabs_5)@ 7280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar addle r1,r1,r9,lsl #1 7300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d7,d28 @mul_res2 = vmlal_u8(mul_res2, src_tmp4, coeffabs_6)@ 7310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ cmp r11, r10 7330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d16,d29 @mul_res2 = vmlsl_u8(mul_res2, src_tmp1, coeffabs_7)@ 7340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r10, r3, r2, lsl #3 @ 10*strd - 8+2 7360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q6,d3,d23 7370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r10, r10, r2 @ 11*strd 7390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d2,d22 7400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r10] @11+ 0 7420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d4,d24 7430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r10, r2] @11+ 1*strd 7450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d5,d25 7460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r10, r2, lsl #1] @11+ 2*strd 7480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d6,d26 7490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r10, r10, r2 @12*strd 7510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d7,d27 7520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pld [r10, r2, lsl #1] @11+ 3*strd 7540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d16,d28 7550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ mov r10, r11 7570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d17,d29 7580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d2},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 7600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q7,d4,d23 7610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r7,r7,#4 7630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d3,d22 7640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d10, d11},[r14],r6 @vst1_u8(pu1_dst_tmp,sto_res)@ 7660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d5,d24 7670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d3},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 7690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d6,d25 7700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d4},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 7720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d7,d26 7730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d5},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 7750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d16,d27 7760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d6},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 7780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d17,d28 7790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d7},[r3],r2 @src_tmp4 = vld1_u8(pu1_src_tmp)@ 7810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d18,d29 7820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt kernel_8_16out @jumps to kernel_8 7850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepilog_16out: 7870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q4,d1,d23 @mul_res1 = vmull_u8(src_tmp2, coeffabs_1)@ 7890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d0,d22 @mul_res1 = vmlsl_u8(mul_res1, src_tmp1, coeffabs_0)@ 7900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d2,d24 @mul_res1 = vmlsl_u8(mul_res1, src_tmp3, coeffabs_2)@ 7910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d3,d25 @mul_res1 = vmlal_u8(mul_res1, src_tmp4, coeffabs_3)@ 7920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d4,d26 @mul_res1 = vmlal_u8(mul_res1, src_tmp1, coeffabs_4)@ 7930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d5,d27 @mul_res1 = vmlsl_u8(mul_res1, src_tmp2, coeffabs_5)@ 7940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d6,d28 @mul_res1 = vmlal_u8(mul_res1, src_tmp3, coeffabs_6)@ 7950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d7,d29 @mul_res1 = vmlsl_u8(mul_res1, src_tmp4, coeffabs_7)@ 7960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d12,d13},[r14],r6 7970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d14,q7,#6 7990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d16},[r3],r2 @src_tmp1 = vld1_u8(pu1_src_tmp)@ 8010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q5,d2,d23 @mul_res2 = vmull_u8(src_tmp3, coeffabs_1)@ 8020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d1,d22 @mul_res2 = vmlsl_u8(mul_res2, src_tmp2, coeffabs_0)@ 8030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d3,d24 @mul_res2 = vmlsl_u8(mul_res2, src_tmp4, coeffabs_2)@ 8040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d4,d25 @mul_res2 = vmlal_u8(mul_res2, src_tmp1, coeffabs_3)@ 8050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d5,d26 @mul_res2 = vmlal_u8(mul_res2, src_tmp2, coeffabs_4)@ 8060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d6,d27 @mul_res2 = vmlsl_u8(mul_res2, src_tmp3, coeffabs_5)@ 8070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q5,d7,d28 @mul_res2 = vmlal_u8(mul_res2, src_tmp4, coeffabs_6)@ 8080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q5,d16,d29 @mul_res2 = vmlsl_u8(mul_res2, src_tmp1, coeffabs_7)@ 8090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d14,d15},[r14],r6 8100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d8,q4,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 8120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d17},[r3],r2 @src_tmp2 = vld1_u8(pu1_src_tmp)@ 8140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q6,d3,d23 8150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d2,d22 8160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d4,d24 8170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d5,d25 8180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d6,d26 8190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d7,d27 8200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q6,d16,d28 8210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q6,d17,d29 8220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r1,r6 8230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d8,d9},[r1]! @vst1_u8(pu1_dst,sto_res)@ 8240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d10,q5,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 8250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u8 {d18},[r3],r2 @src_tmp3 = vld1_u8(pu1_src_tmp)@ 8270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q7,d4,d23 8280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d3,d22 8290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d5,d24 8300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d6,d25 8310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d7,d26 8320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d16,d27 8330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q7,d17,d28 8340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q7,d18,d29 8350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d10,d11},[r14],r6 @vst1_u8(pu1_dst_tmp,sto_res)@ 8370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d12,q6,#6 8380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepilog_end_16out: 8400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d12,d13},[r14],r6 8410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d14,q7,#6 8420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.8 {d14,d15},[r14],r6 8440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_loops_16out: 8470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tst r5,#7 8480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r1, [sp], #4 8490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr r0, [sp], #4 8500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 851d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer ldmfdeq sp!,{r4-r12,r15} @reload the registers from sp 8520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r5, #4 8530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r0, r0, #8 8540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r1, r1, #16 8550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r7, #16 8560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r6, r6, lsr #1 8570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @ 8590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_wd_4_16out: 8610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rsb r9,r5,r6,lsl #2 @r6->dst_strd r5 ->wd 8620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rsb r8,r5,r2,lsl #2 @r2->src_strd 8630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmov.i8 d4,#0 8640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov r6, r6, lsl #1 8650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarouter_loop_wd_4_16out: 8670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r12,r5,#0 8680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ble end_inner_loop_wd_4_16out @outer loop jump 8690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarinner_loop_wd_4_16out: 8710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r0,r2 8720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d4[1]},[r3],r2 @src_tmp1 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp1, 1)@ 8730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r12,r12,#4 8740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d5,d4[1] @src_tmp2 = vdup_lane_u32(src_tmp1, 1)@ 8750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d5[1]},[r3],r2 @src_tmp2 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp2, 1)@ 8760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d4[0]},[r0] @src_tmp1 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp1, 0)@ 8770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q0,d5,d23 @mul_res1 = vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1)@ 8780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d6,d5[1] @src_tmp3 = vdup_lane_u32(src_tmp2, 1)@ 8800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r0,r0,#4 8810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d6[1]},[r3],r2 @src_tmp3 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp3, 1)@ 8820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q0,d4,d22 @mul_res1 = vmlsl_u8(mul_res1, vreinterpret_u8_u32(src_tmp1), coeffabs_0)@ 8830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d7,d6[1] @src_tmp4 = vdup_lane_u32(src_tmp3, 1)@ 8850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d7[1]},[r3],r2 @src_tmp4 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp4, 1)@ 8860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q0,d6,d24 @mul_res1 = vmlsl_u8(mul_res1, vreinterpret_u8_u32(src_tmp3), coeffabs_2)@ 8870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q4,d7,d23 8890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d4,d7[1] @src_tmp1 = vdup_lane_u32(src_tmp4, 1)@ 8900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmull.u8 q1,d7,d25 @mul_res2 = vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3)@ 8910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d4[1]},[r3],r2 @src_tmp1 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp1, 1)@ 8920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d6,d22 8930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q0,d4,d26 @mul_res1 = vmlal_u8(mul_res1, vreinterpret_u8_u32(src_tmp1), coeffabs_4)@ 8940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d5,d4[1] @src_tmp2 = vdup_lane_u32(src_tmp1, 1)@ 8960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d4,d24 8970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d5[1]},[r3],r2 @src_tmp2 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp2, 1)@ 8980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q1,d5,d27 @mul_res2 = vmlsl_u8(mul_res2, vreinterpret_u8_u32(src_tmp2), coeffabs_5)@ 8990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d6,d5[1] @src_tmp3 = vdup_lane_u32(src_tmp2, 1)@ 9010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d5,d25 9020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d6[1]},[r3],r2 @src_tmp3 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp3, 1)@ 9030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q0,d6,d28 @mul_res1 = vmlal_u8(mul_res1, vreinterpret_u8_u32(src_tmp3), coeffabs_6)@ 9040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d7,d6[1] @src_tmp4 = vdup_lane_u32(src_tmp3, 1)@ 9060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d6,d26 9070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d7[1]},[r3],r2 @src_tmp4 = vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp4, 1)@ 9080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q1,d7,d29 @mul_res2 = vmlsl_u8(mul_res2, vreinterpret_u8_u32(src_tmp4), coeffabs_7)@ 9090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d4,d7[1] 9110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vadd.i16 q0,q0,q1 @mul_res1 = vaddq_u16(mul_res1, mul_res2)@ 9120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d7,d27 9140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d4[1]},[r3],r2 9150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlal.u8 q4,d4,d28 9160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vdup.u32 d5,d4[1] 9170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d0,q0,#6 @sto_res = vqmovun_s16(sto_res_tmp)@ 9180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vld1.u32 {d5[1]},[r3] 9200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r3,r1,r6 9210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d0},[r1]! @vst1_lane_u32((uint32_t *)pu1_dst, vreinterpret_u32_u8(sto_res), 0)@ 9220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vmlsl.u8 q4,d5,d29 9240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d1},[r3],r6 @vst1_lane_u32((uint32_t *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1)@ 9250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @vqrshrun.s16 d8,q4,#6 9260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d8},[r3],r6 9280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @add r1,r1,#4 9290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1.32 {d9},[r3] 9300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt inner_loop_wd_4_16out 9310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_inner_loop_wd_4_16out: 9330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs r7,r7,#4 9340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r1,r1,r9,lsl #1 9350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r0,r0,r8 9360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt outer_loop_wd_4_16out 9370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldmfd sp!, {r4-r12, r15} @reload the registers from sp 9390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 948