10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/*****************************************************************************
20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License");
60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License.
70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at:
80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0
100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software
120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS,
130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and
150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License.
160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/
180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/**
190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @file
210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  ihevc_intra_pred_chroma_mode_19_to_25.s
220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief
240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  contains function definitions for intra prediction dc filtering.
250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* functions are coded using neon  intrinsics and can be compiled using
260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* rvct
280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @author
300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  naveen sr
310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par list of functions:
330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks
360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  none
370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/
400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/**
410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief
440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*    chroma intraprediction filter for dc input
450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par description:
470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] pu1_ref
490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  uword8 pointer to the source
500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[out] pu1_dst
520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  uword8 pointer to the destination
530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] src_strd
550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  integer source stride
560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] dst_strd
580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  integer destination stride
590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] nt
610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  size of tranform block
620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] mode
640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  type of filtering
650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @returns
670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks
690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  none
700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/
730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_intra_pred_chroma_mode_19_to_25(uword8* pu1_ref,
750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               word32 src_strd,
760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               uword8* pu1_dst,
770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               word32 dst_strd,
780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               word32 nt,
790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               word32 mode)
800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@
810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************variables vs registers*****************************************
820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r0 => *pu1_ref
830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r1 => src_strd
840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r2 => *pu1_dst
850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r3 => dst_strd
860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@stack contents from #40
880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   nt
890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   mode
900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text
920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4
930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_intra_pred_chroma_mode_19_to_25_a9q
980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gai4_ihevc_ang_table
990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gai4_ihevc_inv_ang_table
1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gau1_ihevc_planar_factor
1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargai4_ihevc_inv_ang_table_addr:
1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gai4_ihevc_inv_ang_table - ulbl1 - 8
1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargau1_ihevc_planar_factor_addr:
1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gau1_ihevc_planar_factor - ulbl2 - 8
1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargai4_ihevc_ang_table_addr_1:
1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gai4_ihevc_ang_table - ulbl3 - 8
1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargai4_ihevc_ang_table_addr_2:
1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gai4_ihevc_ang_table - ulbl4 - 8
1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_intra_pred_chroma_mode_19_to_25_a9q, %function
1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_intra_pred_chroma_mode_19_to_25_a9q:
1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r4,[sp,#40]                 @loads nt
1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r7, gai4_ihevc_ang_table_addr_1
1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl3:
1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r7,r7,pc
1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r5,[sp,#44]                 @mode (19 to 25)
1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r8, gai4_ihevc_inv_ang_table_addr
1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl1:
1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r8,r8,pc
1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r7, r7, r5, lsl #2          @gai4_ihevc_ang_table[mode]
1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r8, r8, r5, lsl #2          @gai4_ihevc_inv_ang_table
1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r8, r8, #48                 @gai4_ihevc_inv_ang_table[mode - 12]
1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r7, [r7]                    @intra_pred_ang
1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         sp, sp, #132                @ref_temp[2 * max_cu_size + 2]
1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r8, [r8]                    @inv_ang
1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6, sp, r4 , lsl #1         @ref_temp + 2 * nt
1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mul         r9, r4, r7                  @nt*intra_pred_ang
1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r6, r6, #2                  @ref_temp + 2*nt - 2
1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r1, r0, r4, lsl #2          @r1 = &src[4nt]
1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d30, r7                     @intra_pred_ang
1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r7, r4
1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    asr         r9, r9, #5
1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.32     d0,[r1]!                    @ pu1_ref[two_nt + k]
1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.32     d0,[r6]!                    @ref_temp[k + nt - 1] = pu1_ref[two_nt + k]@
1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r7, r7, #4
1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         end_loop_copy
1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r7,r7,#4
1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         loop_copy_8
1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r7,r7,#8
1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         loop_copy_16
1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarloop_copy_32:
1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d0,d1,d2,d3},[r1]!
1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d4,d5,d6},[r1]!
1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d0,d1,d2,d3},[r6]!
1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d4,d5,d6},[r6]!
1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_loop_copy
1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarloop_copy_16:
1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d0,d1,d2},[r1]!
1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d0,d1,d2},[r6]!
1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_loop_copy
1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarloop_copy_8:
1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      d0,[r1]!
1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d0,[r6]!
1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_loop_copy:
1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldrh        r11, [r1]
1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    strh        r11, [r6]
1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         r9, #-1
1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bge         linear_filtering
1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6, sp, r4 ,lsl #1          @ref_temp + 2 * nt
1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r6, r6, #4                  @ref_temp + 2 * nt - 2 - 2
1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r12, #0xffffffff
1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rsb         r9, r9, r12                 @count to take care off ref_idx
1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r1, r0, r4, lsl #2          @r1 = &src[2nt]
1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r7, #128                    @inv_ang_sum
2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarloop_copy_ref_idx:
2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r7, r7, r8                  @inv_ang_sum += inv_ang
2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r0,r7, lsr #8
2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r0,r0, lsl #1
2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldrh        r11, [r1, -r0]
2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    strh        r11, [r6], #-2
2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r9, r9, #1
2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bne         loop_copy_ref_idx
2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarlinear_filtering:
2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   after copy
2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   below code is taken from mode 27 to 33 and modified
2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r6,gai4_ihevc_ang_table_addr_2 @loads word32 gai4_ihevc_ang_table[35]
2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl4:
2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r6,pc
2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r7,r4,#2                    @four_nt
2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r8,r6,r5,lsl #2             @*gai4_ihevc_ang_table[mode]
2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r9,[r8]                     @intra_pred_ang = gai4_ihevc_ang_table[mode]
2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r1,gau1_ihevc_planar_factor_addr @used for ((row + 1) * intra_pred_ang) row values
2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl2:
2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r1,r1,pc
2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6,r1,#1
2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r8, sp, r4, lsl #1          @ref_temp + 2 * nt
2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r8,#2                       @ref_temp + 2*nt -2
2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         lr,#0                       @row
2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r12,r4
2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         r4,r4,#1
2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_8:
2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r8,r8,#2                    @pu1_ref_main_idx += (four_nt + 1)
2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d0,r9                       @intra_pred_ang
2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r12,r4,lsr #4               @divide by 8
2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d1,#32
2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mul         r7,r4,r12
2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i16    q3,#31
2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r1,r8
2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r5,r4
2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r11,#2
2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarprologue:
2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d3},[r6]                   @loads the row value
2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.s8    q1,d3,d0                    @pos = ((row + 1) * intra_pred_ang)
2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vand        q2,q1,q3                    @dup_const_fract(fract = pos & (31))
2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmovn.i16   d4,q2
2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshrn.s16   d5,q1,#5                    @idx = pos >> 5
2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshl.s8     d5,d5,#1
2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d31,d4[0]
2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r0,r2,r3
2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.u32    lr,d5[0]                    @(i row)extract idx to the r register
2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   lsl         lr,lr,#1
2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d29,d4[1]                   @(ii)
2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#0,#8
2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(i row)*pu1_ref[ref_main_idx]
2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d8},[r10],r11              @(i row)ref_main_idx
2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#8,#8
2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d9},[r10]                  @(i row)ref_main_idx_1
2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r8,r9                   @(ii)*pu1_ref[ref_main_idx]
2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#16,#8
2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d30,d1,d31                  @32-fract(dup_const_32_fract)
2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(iii)*pu1_ref[ref_main_idx]
2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d12},[r12],r11             @(ii)ref_main_idx
2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q5,d8,d30                   @(i row)vmull_u8(ref_main_idx, dup_const_32_fract)
2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d13},[r12]                 @(ii)ref_main_idx_1
2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q5,d9,d31                   @(i row)vmull_u8(ref_main_idx_1, dup_const_fract)
2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d27,d4[2]                   @(iii)
2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d28,d1,d29                  @(ii)32-fract(dup_const_32_fract)
2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#24,#8
2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d25,d4[3]                   @(iv)
2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q7,d12,d28                  @(ii)vmull_u8(ref_main_idx, dup_const_32_fract)
2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r8,r9                   @(iv)*pu1_ref[ref_main_idx]
2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d16},[r10],r11             @(iii)ref_main_idx
2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q7,d13,d29                  @(ii)vmull_u8(ref_main_idx_1, dup_const_fract)
2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d17},[r10]                 @(iii)ref_main_idx_1
3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d10,q5,#5                   @(i row)shift_res = vrshrn_n_u16(add_res, 5)
3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d20},[r12],r11             @(iv)ref_main_idx
3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d26,d1,d27                  @(iii)32-fract(dup_const_32_fract)
3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d21},[r12]                 @(iv)ref_main_idx_1
3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d31,d4[4]                   @(v)
3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9,d16,d26                  @(iii)vmull_u8(ref_main_idx, dup_const_32_fract)
3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.u32    lr,d5[1]                    @extract idx to the r register
3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9,d17,d27                  @(iii)vmull_u8(ref_main_idx_1, dup_const_fract)
3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   lsl         lr,lr,#1
3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d10},[r2]!                 @(i row)
3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d14,q7,#5                   @(ii)shift_res = vrshrn_n_u16(add_res, 5)
3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#0,#8
3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d29,d4[5]                   @(vi)
3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(v)*pu1_ref[ref_main_idx]
3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d8},[r10],r11              @(v)ref_main_idx
3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d24,d1,d25                  @(iv)32-fract(dup_const_32_fract)
3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11,d20,d24                 @(iv)vmull_u8(ref_main_idx, dup_const_32_fract)
3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#8,#8
3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d9},[r10]                  @(v)ref_main_idx_1
3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11,d21,d25                 @(iv)vmull_u8(ref_main_idx_1, dup_const_fract)
3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d14},[r0],r3               @(ii)
3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18,q9,#5                   @(iii)shift_res = vrshrn_n_u16(add_res, 5)
3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r8,r9                   @(vi)*pu1_ref[ref_main_idx]
3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d27,d4[6]                   @(vii)
3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#16,#8
3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d30,d1,d31                  @(v)32-fract(dup_const_32_fract)
3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(vii)*pu1_ref[ref_main_idx]
3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d12},[r12],r11             @(vi)ref_main_idx
3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q5,d8,d30                   @(v)vmull_u8(ref_main_idx, dup_const_32_fract)
3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d13},[r12]                 @(vi)ref_main_idx_1
3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q5,d9,d31                   @(v)vmull_u8(ref_main_idx_1, dup_const_fract)
3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d18},[r0],r3               @(iii)
3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22,q11,#5                  @(iv)shift_res = vrshrn_n_u16(add_res, 5)
3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d25,d4[7]                   @(viii)
3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#24,#8
3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d16},[r10],r11             @(vii)ref_main_idx
3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d28,d1,d29                  @(vi)32-fract(dup_const_32_fract)
3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d17},[r10]                 @(vii)ref_main_idx_1
3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q7,d12,d28                  @(vi)vmull_u8(ref_main_idx, dup_const_32_fract)
3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r8,r9                   @(viii)*pu1_ref[ref_main_idx]
3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q7,d13,d29                  @(vi)vmull_u8(ref_main_idx_1, dup_const_fract)
3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r7,r7,#8
3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d22},[r0],r3               @(iv)
3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         r4,#8                       @ go to end if 4x4
3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         end_loops
3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d10,q5,#5                   @(v)shift_res = vrshrn_n_u16(add_res, 5)
3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d20},[r12],r11             @(viii)ref_main_idx
3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d26,d1,d27                  @(vii)32-fract(dup_const_32_fract)
3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d21},[r12]                 @(viii)ref_main_idx_1
3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9,d16,d26                  @(vii)vmull_u8(ref_main_idx, dup_const_32_fract)
3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addgt       r8,r8,#8
3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9,d17,d27                  @(vii)vmull_u8(ref_main_idx_1, dup_const_fract)
3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subgt       r4,r4,#8
3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d10},[r0],r3               @(v)
3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d14,q7,#5                   @(vi)shift_res = vrshrn_n_u16(add_res, 5)
3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         epilogue
3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d5},[r6]                   @loads the row value
3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.s8    q1,d5,d0                    @pos = ((row + 1) * intra_pred_ang)
3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vand        q2,q1,q3                    @dup_const_fract(fract = pos & (31))
3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmovn.i16   d4,q2
3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshrn.s16   d3,q1,#5                    @idx = pos >> 5
3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshl.s8     d3,d3,#1
3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.u32    lr,d3[0]                    @(i)extract idx to the r register
3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   lsl         lr,lr,#1
3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#0,#8
3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(i)*pu1_ref[ref_main_idx]
3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarkernel_8_rows:
3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d31,d4[0]
3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r4,r4,#8
3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#8,#8
3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d8},[r10],r11              @(i)ref_main_idx
4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d24,d1,d25                  @(viii)32-fract(dup_const_32_fract)
4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r6,r6,#8                    @increment the row value
4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r8,r9                   @(ii)*pu1_ref[ref_main_idx]
4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d9},[r10]                  @(i)ref_main_idx_1
4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11,d20,d24                 @(viii)vmull_u8(ref_main_idx, dup_const_32_fract)
4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d5},[r6]                   @loads the row value
4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11,d21,d25                 @(viii)vmull_u8(ref_main_idx_1, dup_const_fract)
4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d29,d4[1]                   @(ii)
4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18,q9,#5                   @(vii)shift_res = vrshrn_n_u16(add_res, 5)
4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#16,#8
4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d14},[r0],r3               @(vi)
4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d30,d1,d31                  @(i)32-fract(dup_const_32_fract)
4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(iii)*pu1_ref[ref_main_idx]
4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d12},[r12],r11             @(ii)ref_main_idx
4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q5,d8,d30                   @(i)vmull_u8(ref_main_idx, dup_const_32_fract)
4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d13},[r12]                 @(ii)ref_main_idx_1
4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q5,d9,d31                   @(i)vmull_u8(ref_main_idx_1, dup_const_fract)
4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#24,#8
4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movle       r4,r5                       @reload nt
4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.u32    lr,d3[1]                    @extract idx to the r register
4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22,q11,#5                  @(viii)shift_res = vrshrn_n_u16(add_res, 5)
4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d27,d4[2]                   @(iii)
4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d28,d1,d29                  @(ii)32-fract(dup_const_32_fract)
4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r8,r9                   @(iv)*pu1_ref[ref_main_idx]
4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d16},[r10],r11             @(iii)ref_main_idx
4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q7,d12,d28                  @(ii)vmull_u8(ref_main_idx, dup_const_32_fract)
4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d18},[r0],r3               @(vii)
4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q7,d13,d29                  @(ii)vmull_u8(ref_main_idx_1, dup_const_fract)
4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d17},[r10]                 @(iii)ref_main_idx_1
4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d10,q5,#5                   @(i)shift_res = vrshrn_n_u16(add_res, 5)
4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d25,d4[3]                   @(iv)
4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.s8    q1,d5,d0                    @pos = ((row + 1) * intra_pred_ang)
4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d22},[r0]                  @(viii)
4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d26,d1,d27                  @(iii)32-fract(dup_const_32_fract)
4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d20},[r12],r11             @(iv)ref_main_idx
4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9,d16,d26                  @(iii)vmull_u8(ref_main_idx, dup_const_32_fract)
4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   lsl         lr,lr,#1
4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d21},[r12]                 @(iv)ref_main_idx_1
4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9,d17,d27                  @(iii)vmull_u8(ref_main_idx_1, dup_const_fract)
4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#0,#8
4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r0,r2,r3
4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d31,d4[4]                   @(v)
4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d14,q7,#5                   @(ii)shift_res = vrshrn_n_u16(add_res, 5)
4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(v)*pu1_ref[ref_main_idx]
4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#8,#8
4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d10},[r2]!                 @(i)
4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d24,d1,d25                  @(iv)32-fract(dup_const_32_fract)
4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d29,d4[5]                   @(vi)
4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11,d20,d24                 @(iv)vmull_u8(ref_main_idx, dup_const_32_fract)
4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d27,d4[6]                   @(vii)
4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11,d21,d25                 @(iv)vmull_u8(ref_main_idx_1, dup_const_fract)
4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r8,r9                   @(vi)*pu1_ref[ref_main_idx]
4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#16,#8
4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d25,d4[7]                   @(viii)
4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18,q9,#5                   @(iii)shift_res = vrshrn_n_u16(add_res, 5)
4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d8},[r10],r11              @(v)ref_main_idx
4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vand        q2,q1,q3                    @dup_const_fract(fract = pos & (31))
4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d9},[r10]                  @(v)ref_main_idx_1
4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshrn.s16   d3,q1,#5                    @idx = pos >> 5
4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d14},[r0],r3               @(ii)
4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22,q11,#5                  @(iv)shift_res = vrshrn_n_u16(add_res, 5)
4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(vii)*pu1_ref[ref_main_idx]
4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#24,#8
4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d12},[r12],r11             @(vi)ref_main_idx
4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d30,d1,d31                  @(v)32-fract(dup_const_32_fract)
4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshl.s8     d3,d3,#1
5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d13},[r12]                 @(vi)ref_main_idx_1
5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q5,d8,d30                   @(v)vmull_u8(ref_main_idx, dup_const_32_fract)
5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.u32    lr,d3[0]                    @(i)extract idx to the r register
5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q5,d9,d31                   @(v)vmull_u8(ref_main_idx_1, dup_const_fract)
5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r8,r9                   @(viii)*pu1_ref[ref_main_idx]
5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movle       r8,r1                       @reload the source to pu1_src+2nt
5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d16},[r10],r11             @(vii)ref_main_idx
5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d28,d1,d29                  @(vi)32-fract(dup_const_32_fract)
5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d18},[r0],r3               @(iii)
5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q7,d12,d28                  @(vi)vmull_u8(ref_main_idx, dup_const_32_fract)
5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d17},[r10]                 @(vii)ref_main_idx_1
5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q7,d13,d29                  @(vi)vmull_u8(ref_main_idx_1, dup_const_fract)
5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d20},[r12],r11             @(viii)ref_main_idx
5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d10,q5,#5                   @(v)shift_res = vrshrn_n_u16(add_res, 5)
5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d21},[r12]                 @(viii)ref_main_idx_1
5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d26,d1,d27                  @(vii)32-fract(dup_const_32_fract)
5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addgt       r8,r8,#8                    @increment the source next set 8 columns in same row
5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lslle       r12,r3,#3
5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    suble       r12,r12,r5
5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d22},[r0],r3               @(iv)
5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9,d16,d26                  @(vii)vmull_u8(ref_main_idx, dup_const_32_fract)
5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d10},[r0],r3               @(v)
5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9,d17,d27                  @(vii)vmull_u8(ref_main_idx_1, dup_const_fract)
5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r2,r2,r12                   @increment the dst pointer to 8*dst_strd - nt
5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sbfx        r9,lr,#0,#8
5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmovn.i16   d4,q2
5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d14,q7,#5                   @(vi)shift_res = vrshrn_n_u16(add_res, 5)
5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   lsl         lr,lr,#1
5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r7,r7,#8
5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r10,r8,r9                   @(i)*pu1_ref[ref_main_idx]
5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bne         kernel_8_rows
5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepilogue:
5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d14},[r0],r3               @(vi)
5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18,q9,#5                   @(vii)shift_res = vrshrn_n_u16(add_res, 5)
5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.u8     d24,d1,d25                  @(viii)32-fract(dup_const_32_fract)
5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11,d20,d24                 @(viii)vmull_u8(ref_main_idx, dup_const_32_fract)
5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11,d21,d25                 @(viii)vmull_u8(ref_main_idx_1, dup_const_fract)
5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d18},[r0],r3               @(vii)
5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22,q11,#5                  @(viii)shift_res = vrshrn_n_u16(add_res, 5)
5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      {d22},[r0],r3               @(viii)
5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_loops
5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_4:
5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_loops:
5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         sp, sp, #132
5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
572