10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/*****************************************************************************
20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License");
60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License.
70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at:
80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0
100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software
120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS,
130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and
150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License.
160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/
180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/**
190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @file
210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  ihevc_inter_pred_chroma_vert_neon.s
220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief
240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  contains function definitions for inter prediction  interpolation.
250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* functions are coded using neon  intrinsics and can be compiled using
260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* rvct
280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @author
300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  yogeswaran rs
310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par list of functions:
330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks
360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  none
370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/
400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/**
410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/**
420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief
450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*   chroma interprediction filter for vertical input
460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par description:
480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*    applies a vertical filter with coefficients pointed to  by 'pi1_coeff' to
490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*    the elements pointed by 'pu1_src' and  writes to the location pointed by
500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*    'pu1_dst'  the output is down shifted by 6 and clipped to 8 bits
510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*    assumptions : the function is optimized considering the fact width is
520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*    multiple of 2,4 or 8. and also considering height  should be multiple of 2
530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*    width 4,8 is optimized further
540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] pu1_src
560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  uword8 pointer to the source
570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[out] pu1_dst
590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  uword8 pointer to the destination
600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] src_strd
620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  integer source stride
630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] dst_strd
650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  integer destination stride
660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] pi1_coeff
680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  word8 pointer to the filter coefficients
690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] ht
710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  integer height of the array
720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] wd
740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  integer width of the array
750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @returns
770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks
790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  none
800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/
830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_inter_pred_chroma_vert(uword8 *pu1_src,
840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                                   uword8 *pu1_dst,
850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                                   word32 src_strd,
860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                                   word32 dst_strd,
870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                                   word8 *pi1_coeff,
880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                                   word32 ht,
890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                                   word32 wd)
900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************variables vs registers*****************************************
910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r0 => *pu1_src
920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r1 => *pi2_dst
930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r2 =>  src_strd
940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r3 =>  dst_strd
950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text
960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4
970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_inter_pred_chroma_vert_a9q
1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_inter_pred_chroma_vert_a9q, %function
1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_inter_pred_chroma_vert_a9q:
1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    stmfd       sp!,{r4-r12,r14}            @stack stores the values of the arguments
1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r4,[sp,#44]                 @loads ht
1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r12,[sp,#40]                @loads pi1_coeff
1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         r4,#0                       @checks ht == 0
1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r6,[sp,#48]                 @loads wd
1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r0,r0,r2                    @pu1_src - src_strd
1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d0},[r12]                  @loads pi1_coeff
1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ble         end_loops                   @jumps to end
1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tst         r6,#3                       @checks (wd & 3)
1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vabs.s8     d3,d0                       @vabs_s8(coeff)
1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r10,r6,#1                   @2*wd
1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d0,d3[0]                    @coeffabs_0
1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d1,d3[1]                    @coeffabs_1
1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d2,d3[2]                    @coeffabs_2
1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d3,d3[3]                    @coeffabs_3
1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         outer_loop_wd_2             @jumps to loop handling wd ==2
1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tst         r4,#7                       @checks ht for mul of 8
1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         core_loop_ht_8              @when height is multiple of 8
1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r7,r3,#1                    @2*dst_strd
1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r9,r7,r10                   @2*dst_strd - 2wd
1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r12,r2,#1                   @2*src_strd
1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r8,r12,r10                  @2*src_strd - 2wd
1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r5,r10                      @2wd
1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarinner_loop_ht_2:                            @called when wd is multiple of 4 and ht is 4,2
1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r0,r2                    @pu1_src +src_strd
1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d9},[r6],r2                @loads pu1_src
1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r5,r5,#8                    @2wd - 8
1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d5},[r0]!                  @loads src
1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q3,d9,d1                    @vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1)
1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d4},[r6],r2                @loads incremented src
1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q3,d5,d0                    @vmlsl_u8(mul_res1, vreinterpret_u8_u32(src_tmp1), coeffabs_0)
1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d8},[r6],r2                @loads incremented src
1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q3,d4,d2                    @vmlal_u8(mul_res1, vreinterpret_u8_u32(src_tmp3), coeffabs_2)
1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q2,d4,d1
1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q3,d8,d3
1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q2,d9,d0
1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d10},[r6]                  @loads the incremented src
1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q2,d8,d2
1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d6,q3,#6                   @shifts right
1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q2,d10,d3
1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r1,r3                    @pu1_dst + dst_strd
1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d4,q2,#6                   @shifts right
1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d6},[r1]!                  @stores the loaded value
1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d4},[r6]                   @stores the loaded value
1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         inner_loop_ht_2             @inner loop again
1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r4,r4,#2                    @ht - 2
1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r1,r1,r9                    @pu1_dst += (2*dst_strd - 2wd)
1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r5,r10                      @2wd
1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r0,r0,r8                    @pu1_src += (2*src_strd - 2wd)
1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         inner_loop_ht_2             @loop again
1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_loops                   @jumps to end
1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarouter_loop_wd_2:                            @called when width is multiple of 2
1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r5,r3,#1                    @2*dst_strd
1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r12,r10                     @2wd
1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r9,r5,r10                   @2*dst_strd - 2wd
1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r7,r2,#1                    @2*src_strd
1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r8,r7,r10                   @2*src_strd - 2wd
1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarinner_loop_wd_2:
1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r0,r2                    @pu1_src + src_strd
1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.32     {d6[0]},[r0]                @vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp1, 0
1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r12,r12,#4                  @2wd - 4
1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r0,r0,#4                    @pu1_src + 4
1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.32     {d6[1]},[r6],r2             @loads pu1_src_tmp
1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.32     d7,d6[1]
1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.32     {d7[1]},[r6],r2             @loads pu1_src_tmp
1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q2,d7,d1                    @vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1)
1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.32     d7,d7[1]
1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.32     {d7[1]},[r6],r2
1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q2,d6,d0
1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q2,d7,d2
1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.32     d7,d7[1]
1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.32     {d7[1]},[r6]
1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r1,r3                    @pu1_dst + dst_strd
1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q2,d7,d3
1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d4,q2,#6                   @vrshrq_n_s16(vreinterpretq_s16_u16(mul_res1),6)
1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.32     {d4[0]},[r1]                @stores the loaded value
1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r1,r1,#4                    @pu1_dst += 4
2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.32     {d4[1]},[r6]                @stores the loaded value
2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         inner_loop_wd_2             @inner loop again
2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    @inner loop ends
2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r4,r4,#2                    @ht - 2
2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r1,r1,r9                    @pu1_dst += 2*dst_strd - 2*wd
2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r12,r10                     @2wd
2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r0,r0,r8                    @pu1_src += 2*src_strd - 2*wd
2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         inner_loop_wd_2             @loop again
2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_loops                   @jumps to end
2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_ht_8:                             @when wd & ht is multiple of 8
2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r12,r3,#2                   @4*dst_strd
2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r8,r12,r10                  @4*dst_strd - 2wd
2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r12,r2,#2                   @4*src_strd
2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r9,r12,r10                  @4*src_strd - 2wd
2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bic         r5,r10,#7                   @r5 ->wd
2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r14,r10,lsr #3              @divide by 8
2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mul         r12,r4,r14                  @multiply height by width
2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r12,#4                      @subtract by one for epilog
2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarprolog:
2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r0,r2                    @pu1_src + src_strd
2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d5},[r6],r2                @loads pu1_src
2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r5,r5,#8                    @2wd - 8
2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d4},[r0]!                  @loads the source
2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d6},[r6],r2                @load and increment
2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q15,d5,d1                   @mul with coeff 1
2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d7},[r6],r2                @load and increment
2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q15,d4,d0
2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r7,r1,r3                    @pu1_dst
2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q15,d6,d2
2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q15,d7,d3
2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d8},[r6],r2                @load and increment
2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q14,d6,d1                   @mul_res 2
2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r0,r0,r9                    @pu1_dst += 4*dst_strd - 2*wd
2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q14,d5,d0
2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bicle       r5,r10,#7                   @r5 ->wd
2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q14,d7,d2
2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d9},[r6],r2
2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q14,d8,d3
2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d30,q15,#6
2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d10},[r6],r2
2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q13,d7,d1
2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r0,r2                    @pu1_src + src_strd
2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q13,d6,d0
2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d30},[r1]!                 @stores the loaded value
2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q13,d8,d2
2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d4},[r0]!                  @loads the source
2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q13,d9,d3
2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d28,q14,#6
2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r1,r1,r8                    @pu1_src += 4*src_strd - 2*wd
2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q12,d8,d1
2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d5},[r6],r2                @loads pu1_src
2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q12,d7,d0
2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r12,r12,#4
2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d6},[r6],r2                @load and increment
2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q12,d9,d2
2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d7},[r6],r2                @load and increment
2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q12,d10,d3
2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r11,r2,#2
2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d28},[r7],r3               @stores the loaded value
2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d26,q13,#6
2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rsb         r11,r2,r2,lsl #3
2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r14,r2,r2,lsl #1
2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r14,r14,r11
2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ble         epilog                      @jumps to epilog
2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarkernel_8:
2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q15,d5,d1                   @mul with coeff 1
2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r5,r5,#8                    @2wd - 8
2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q15,d4,d0
2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r0,r0,r9                    @pu1_dst += 4*dst_strd - 2*wd
2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q15,d6,d2
2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rsble       r11,r2,r2,lsl #3
2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q15,d7,d3
2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d26},[r7],r3               @stores the loaded value
2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d24,q12,#6
2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d8},[r6],r2                @load and increment
2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q14,d6,d1                   @mul_res 2
2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bicle       r5,r10,#7                   @r5 ->wd
2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q14,d5,d0
2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d24},[r7],r3               @stores the loaded value
2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q14,d7,d2
2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d9},[r6],r2
2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d30,q15,#6
3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q14,d8,d3
3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d10},[r6],r2
3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r7,r1,r3                    @pu1_dst
3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q13,d7,d1
3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r0,r2                    @pu1_src + src_strd
3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    pld         [r0,r11]
3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q13,d6,d0
3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d4},[r0]!                  @loads the source
3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q13,d8,d2
3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d30},[r1]!                 @stores the loaded value
3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q13,d9,d3
3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d5},[r6],r2                @loads pu1_src
3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r11,r11,r2
3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d28,q14,#6
3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q12,d8,d1
3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d6},[r6],r2                @load and increment
3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r1,r1,r8                    @pu1_src += 4*src_strd - 2*wd
3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         r11,r14
3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rsbgt       r11,r2,r2,lsl #3
3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q12,d7,d0
3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r12,r12,#4
3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q12,d9,d2
3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d7},[r6],r2                @load and increment
3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q12,d10,d3
3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d28},[r7],r3               @stores the loaded value
3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d26,q13,#6
3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         kernel_8                    @jumps to kernel_8
3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepilog:
3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q15,d5,d1                   @mul with coeff 1
3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q15,d4,d0
3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q15,d6,d2
3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q15,d7,d3
3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d26},[r7],r3               @stores the loaded value
3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d24,q12,#6
3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d8},[r6],r2                @load and increment
3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q14,d6,d1                   @mul_res 2
3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q14,d5,d0
3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q14,d7,d2
3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q14,d8,d3
3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d24},[r7],r3               @stores the loaded value
3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d30,q15,#6
3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d9},[r6],r2
3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q13,d7,d1
3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r7,r1,r3                    @pu1_dst
3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q13,d6,d0
3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d30},[r1]!                 @stores the loaded value
3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d28,q14,#6
3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q13,d8,d2
3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d10},[r6],r2
3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q13,d9,d3
3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q12,d8,d1
3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d26,q13,#6
3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d28},[r7],r3               @stores the loaded value
3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q12,d7,d0
3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q12,d9,d2
3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d26},[r7],r3               @stores the loaded value
3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlsl.u8    q12,d10,d3
3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqrshrun.s16 d24,q12,#6
3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d24},[r7],r3               @stores the loaded value
3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_loops:
3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
384