10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/*****************************************************************************
20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License");
60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License.
70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at:
80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0
100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software
120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS,
130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and
150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License.
160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/
180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/**
190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @file
210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  ihevc_intra_pred_luma_mode_3_to_9.s
220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief
240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  contains function definitions for intra prediction dc filtering.
250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* functions are coded using neon  intrinsics and can be compiled using
260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* rvct
280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @author
300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  parthiban v
310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par list of functions:
330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks
360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  none
370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/
400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/**
410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @brief
440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*    luma intraprediction filter for dc input
450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @par description:
470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] pu1_ref
490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  uword8 pointer to the source
500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[out] pu1_dst
520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  uword8 pointer to the destination
530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] src_strd
550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  integer source stride
560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] dst_strd
580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  integer destination stride
590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] nt
610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  size of tranform block
620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @param[in] mode
640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  type of filtering
650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @returns
670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* @remarks
690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  none
700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/
730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_intra_pred_luma_mode_3_to_9(uword8* pu1_ref,
750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               word32 src_strd,
760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               uword8* pu1_dst,
770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               word32 dst_strd,
780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               word32 nt,
790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                               word32 mode)
800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@
810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************variables vs registers*****************************************
820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r0 => *pu1_ref
830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r1 => src_strd
840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r2 => *pu1_dst
850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r3 => dst_strd
860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@stack contents from #40
880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   nt
890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@   mode
900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text
920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4
930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_intra_pred_luma_mode_3_to_9_a9q
980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gai4_ihevc_ang_table
990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gai4_ihevc_inv_ang_table
1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern col_for_intra_luma
1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern idx_neg_idx_3_9
1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargai4_ihevc_ang_table_addr:
1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gai4_ihevc_ang_table - ulbl1 - 8
1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargai4_ihevc_inv_ang_table_addr:
1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gai4_ihevc_inv_ang_table - ulbl2 - 8
1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakaridx_neg_idx_3_9_addr_1:
1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long idx_neg_idx_3_9 - ulbl3_1 - 8
1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakaridx_neg_idx_3_9_addr_2:
1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long idx_neg_idx_3_9 - ulbl3_2 - 8
1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcol_for_intra_luma_addr_1:
1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long col_for_intra_luma - ulbl4_1 - 8
1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcol_for_intra_luma_addr_2:
1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long col_for_intra_luma - ulbl4_2 - 8
1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcol_for_intra_luma_addr_3:
1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long col_for_intra_luma - ulbl4_3 - 8
1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_intra_pred_luma_mode_3_to_9_a9q, %function
1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_intra_pred_luma_mode_3_to_9_a9q:
1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r4,[sp,#40]                 @loads nt
1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r7, gai4_ihevc_ang_table_addr
1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl1:
1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r7,r7,pc
1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r5,[sp,#44]                 @mode (3 to 9)
1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r8, gai4_ihevc_inv_ang_table_addr
1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl2:
1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r8,r8,pc
1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r7, r7, r5, lsl #2          @gai4_ihevc_ang_table[mode]
1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r7, [r7]                    @intra_pred_ang
1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d30, r7                     @intra_pred_ang
1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r14, col_for_intra_luma_addr_1
1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl4_1:
1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r14,r14,pc
1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         r4, #4
1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         sz_4_proc
1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           prologue_8_16_32
1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarprologue_8_16_32:
1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsr         r10, r4, #3
1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      d31, [r14]!
1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mul         r10, r4, r10                @block counter (dec by #8)
1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r11, r4                     @col counter to be inc/dec by #8
1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.s8    q11, d30, d31               @(col+1)*intra_pred_angle [0:7](col)
1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r7, r5, #3
1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d2,#1                       @contains #1 for adding to get ref_main_idx + 1
1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r12, idx_neg_idx_3_9_addr_1 @load least idx table
1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl3_1:
1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r12,pc
1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d3, #2
1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12, r12, r7, lsl #4
1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r8, r12
1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r7, #8
1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r7, r7, r3, lsl #3          @r7 = 8-8r3
1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r9, [r8]
1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r1, r0, r4, lsl #1          @pu1_ref + nt
1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmovn.s16   d6, q11
1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d26, r9                     @least idx added to final idx values
1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r1, r1, #9                  @ref_main_idx + 2nt - (8 + 1)(two_nt - idx - row ) for 8 & 8 - 1row
1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r6, r1, r9
1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d0,d1}, [r6]               @stores the 32 values reqd based on indices values (from least idx)
1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshr.s16    q11, q11, #5
1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d29, #31                    @contains #31 for vand operation
1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d28, #32
1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqmovn.s16  d8, q11
1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vand        d6, d6, d29                 @fract values in d1/ idx values in d0
1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r0, #1
1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d27, #7                     @row 0 to 7
1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d2                  @ref_main_idx (sub row)
1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d26, d8                 @ref_main_idx (row 0)
2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vadd.s8     d8, d8, d27                 @t0 compensate the pu1_src idx incremented by 8
2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d8, d2                  @ref_main_idx + 1 (row 0)
2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d12, {d0,d1}, d8            @load from ref_main_idx (row 0)
2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d7, d28, d6                 @32-fract
2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d13, {d0,d1}, d9            @load from ref_main_idx + 1 (row 0)
2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d8, d2                  @ref_main_idx (row 1)
2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d9, d2                  @ref_main_idx + 1 (row 1)
2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d16, {d0,d1}, d4            @load from ref_main_idx (row 1)
2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q12, d12, d7                @mul (row 0)
2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q12, d13, d6                @mul (row 0)
2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d17, {d0,d1}, d5            @load from ref_main_idx + 1 (row 1)
2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d3                  @ref_main_idx (row 2)
2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d9, d3                  @ref_main_idx + 1 (row 2)
2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d24, q12, #5                @round shft (row 0)
2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d14, {d0,d1}, d8            @load from ref_main_idx (row 2)
2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11, d16, d7                @mul (row 1)
2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11, d17, d6                @mul (row 1)
2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d15, {d0,d1}, d9            @load from ref_main_idx + 1 (row 2)
2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d4, d3                  @ref_main_idx (row 3)
2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d5, d3                  @ref_main_idx + 1 (row 3)
2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d24, [r2], r3               @st (row 0)
2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22, q11, #5                @round shft (row 1)
2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d10, {d0,d1}, d4            @load from ref_main_idx (row 3)
2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q10, d14, d7                @mul (row 2)
2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q10, d15, d6                @mul (row 2)
2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d11, {d0,d1}, d5            @load from ref_main_idx + 1 (row 3)
2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d3                  @ref_main_idx (row 4)
2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d9, d3                  @ref_main_idx + 1 (row 4)
2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d22, [r2], r3               @st (row 1)
2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d20, q10, #5                @round shft (row 2)
2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d12, {d0,d1}, d8            @load from ref_main_idx (row 4)
2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9, d10, d7                 @mul (row 3)
2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9, d11, d6                 @mul (row 3)
2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d13, {d0,d1}, d9            @load from ref_main_idx + 1 (row 4)
2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d4, d3                  @ref_main_idx (row 5)
2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d5, d3                  @ref_main_idx + 1 (row 5)
2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d20, [r2], r3               @st (row 2)
2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18, q9, #5                 @round shft (row 3)
2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d16, {d0,d1}, d4            @load from ref_main_idx (row 5)
2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q12, d12, d7                @mul (row 4)
2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q12, d13, d6                @mul (row 4)
2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d17, {d0,d1}, d5            @load from ref_main_idx + 1 (row 5)
2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d3                  @ref_main_idx (row 6)
2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d9, d3                  @ref_main_idx + 1 (row 6)
2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d18, [r2], r3               @st (row 3)
2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d24, q12, #5                @round shft (row 4)
2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d14, {d0,d1}, d8            @load from ref_main_idx (row 6)
2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11, d16, d7                @mul (row 5)
2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11, d17, d6                @mul (row 5)
2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d15, {d0,d1}, d9            @load from ref_main_idx + 1 (row 6)
2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d4, d3                  @ref_main_idx (row 7)
2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d5, d3                  @ref_main_idx + 1 (row 7)
2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d24, [r2], r3               @st (row 4)
2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22, q11, #5                @round shft (row 5)
2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d10, {d0,d1}, d4            @load from ref_main_idx (row 7)
2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q10, d14, d7                @mul (row 6)
2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q10, d15, d6                @mul (row 6)
2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d11, {d0,d1}, d5            @load from ref_main_idx + 1 (row 7)
2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9, d10, d7                 @mul (row 7)
2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9, d11, d6                 @mul (row 7)
2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d22, [r2], r3               @st (row 5)
2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d20, q10, #5                @round shft (row 6)
2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18, q9, #5                 @round shft (row 7)
2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d20, [r2], r3               @st (row 6)
2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r10, r10, #8                @subtract 8 and go to end if 8x8
2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d18, [r2], r3               @st (row 7)
2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         end_func
2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r11, r11, #8
2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addgt       r8, r8, #4
2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addgt       r2, r2, r7
2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movle       r8, r12
2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    suble       r2, r2, r4
2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r2, r2, #8
3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movle       r11, r4
3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldrle       r14, col_for_intra_luma_addr_2
3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl4_2:
3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r14,r14,pc
3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r0, r0, #8
3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r5,r2
3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      d31, [r14]!
3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.s8    q6, d30, d31                @(col+1)*intra_pred_angle [0:7](col)
3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmovn.s16   d10, q6
3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshr.s16    q6, q6, #5
3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqmovn.s16  d11, q6
3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r9, [r8]
3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r9, r0, r9
3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r9, r9, #1
3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d26, r9
3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d16,#8
3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r4,r4,#8
3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarkernel_8_16_32:
3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d26, d11                @ref_main_idx
3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov        d26,d10
3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r11, r11, #8
3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r6, r1, r9
3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d10, {d0,d1}, d4            @load from ref_main_idx (row 7)
3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vadd.s8     d8, d8, d16                 @to compensate the pu1_src idx incremented by 8
3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q10, d14, d7                @mul (row 6)
3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d11, {d0,d1}, d5            @load from ref_main_idx - 1 (row 7)
3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q10, d15, d6                @mul (row 6)
3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d8, d2                  @ref_main_idx - 1
3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r0, r0, #8
3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addgt       r8, r8, #4
3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d0,d1}, [r6]               @stores the 32 values reqd based on indices values (from least idx)
3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d24, [r5], r3               @st (row 4)
3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22, q11, #5                @round shft (row 5)
3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldrle       r14, col_for_intra_luma_addr_3
3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl4_3:
3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r14,r14,pc
3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movle       r8, r12
3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d27, r0                     @row value inc or reset accordingly
3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d8, d2                  @ref_main_idx (row 1)
3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d12, {d0,d1}, d8            @load from ref_main_idx (row 0)
3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d9, d2                  @ref_main_idx - 1 (row 1)
3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9, d10, d7                 @mul (row 7)
3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d13, {d0,d1}, d9            @load from ref_main_idx + 1 (row 0)
3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9, d11, d6                 @mul (row 7)
3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      d31, [r14]!
3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vand        d6, d29, d26                @fract values in d1/ idx values in d0
3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d22, [r5], r3               @(from previous loop)st (row 5)
3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d20, q10, #5                @(from previous loop)round shft (row 6)
3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d3                  @ref_main_idx (row 2)
3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d10, {d0,d1}, d4            @load from ref_main_idx (row 1)
3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d9, d3                  @ref_main_idx - 1 (row 2)
3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addle       r11, r4, #8
3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r9, [r8]
3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d7, d28, d6                 @32-fract
3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q12, d12, d7                @mul (row 0)
3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d17, {d0,d1}, d5            @load from ref_main_idx + 1 (row 1)
3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q12, d13, d6                @mul (row 0)
3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d20, [r5], r3               @(from previous loop)st (row 6)
3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18, q9, #5                 @(from previous loop)round shft (row 7)
3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d4, d3                  @ref_main_idx (row 3)
3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d14, {d0,d1}, d8            @load from ref_main_idx (row 2)
3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d5, d3                  @ref_main_idx - 1 (row 3)
3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11, d10, d7                @mul (row 1)
3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d15, {d0,d1}, d9            @load from ref_main_idx + 1 (row 2)
3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11, d17, d6                @mul (row 1)
3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d24, q12, #5                @round shft (row 0)
3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d18, [r5], r3               @(from previous loop)st (row 7)
3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d3                  @ref_main_idx (row 4)
3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d10, {d0,d1}, d4            @load from ref_main_idx (row 3)
3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d9, d3                  @ref_main_idx - 1 (row 4)
3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q10, d14, d7                @mul (row 2)
3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d11, {d0,d1}, d5            @load from ref_main_idx + 1 (row 3)
3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q10, d15, d6                @mul (row 2)
3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.s8    q7, d30, d31                @(col+1)*intra_pred_angle [0:7](col)
3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r5,r2,r3,lsl#2
4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r9, r0, r9
4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d24, [r2], r3               @st (row 0)
4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22, q11, #5                @round shft (row 1)
4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d4, d3                  @ref_main_idx (row 5)
4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d12, {d0,d1}, d8            @load from ref_main_idx (row 4)
4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d5, d3                  @ref_main_idx - 1 (row 5)
4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9, d10, d7                 @mul (row 3)
4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d13, {d0,d1}, d9            @load from ref_main_idx + 1 (row 4)
4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9, d11, d6                 @mul (row 3)
4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d22, [r2], r3               @st (row 1)
4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d20, q10, #5                @round shft (row 2)
4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmovn.s16   d10, q7
4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshr.s16    q7, q7, #5
4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d3                  @ref_main_idx (row 6)
4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d21, {d0,d1}, d4            @load from ref_main_idx (row 5)
4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d9, d3                  @ref_main_idx - 1 (row 6)
4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q12, d12, d7                @mul (row 4)
4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d17, {d0,d1}, d5            @load from ref_main_idx + 1 (row 5)
4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q12, d13, d6                @mul (row 4)
4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d20, [r2], r3               @st (row 2)
4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18, q9, #5                 @round shft (row 3)
4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r9, r9, #1
4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqmovn.s16  d11, q7
4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d4, d3                  @ref_main_idx (row 7)
4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d14, {d0,d1}, d8            @load from ref_main_idx (row 6)
4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d5, d3                  @ref_main_idx - 1 (row 7)
4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11, d21, d7                @mul (row 5)
4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d15, {d0,d1}, d9            @load from ref_main_idx + 1 (row 6)
4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11, d17, d6                @mul (row 5)
4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vadd.s8     d11, d27, d11               @ref_main_idx (add row)
4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d26, r9
4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d18, [r2], r3               @st (row 3)
4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d24, q12, #5                @round shft (row 4)
4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r2,r3, lsl #2
4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d11, d11, d2                @ref_main_idx -1 (sub 1)
4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    addgt       r2, r7, r2
4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    suble       r2, r2, r4
4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        r10, r10, #8                @subtract 8 and go to end if 8x8
4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bne         kernel_8_16_32
4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepil_8_16_32:
4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d10, {d0,d1}, d4            @load from ref_main_idx (row 7)
4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q10, d14, d7                @mul (row 6)
4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d11, {d0,d1}, d5            @load from ref_main_idx + 1 (row 7)
4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q10, d15, d6                @mul (row 6)
4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d24, [r5], r3               @st (row 4)
4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d24, q11, #5                @round shft (row 5)
4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9, d10, d7                 @mul (row 7)
4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9, d11, d6                 @mul (row 7)
4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d24, [r5], r3               @(from previous loop)st (row 5)
4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d20, q10, #5                @(from previous loop)round shft (row 6)
4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d20, [r5], r3               @(from previous loop)st (row 6)
4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18, q9, #5                 @(from previous loop)round shft (row 7)
4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.8      d18, [r5], r3               @st (row 7)
4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_func
4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarsz_4_proc:
4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      d31, [r14]
4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d2, #1                      @contains #1 for adding to get ref_main_idx - 1
4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d3, #2
4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r12, idx_neg_idx_3_9_addr_2 @load least idx table
4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl3_2:
4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12,r12,pc
4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.s8    q11, d30, d31               @(col+1)*intra_pred_angle [0:7](col)
4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r7, r5, #3
4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r12, r12, r7, lsl #4
4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         r8, r12
4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         r9, [r8]
4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vdup.8      d26, r9                     @least idx added to final idx values
4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r6, r0, r4, lsl #1          @pu1_ref + 2nt
4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmovn.s16   d6, q11
5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r6, r6, #9                  @ref_main_idx + 2nt - (8 + 1)(two_nt - idx - row ) for 8 & 8 - 1row
5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         r6, r6, r9
5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vld1.8      {d0,d1}, [r6]               @stores the 32 values reqd based on indices values (from least idx)
5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d29, #31                    @contains #31 for vand operation
5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d28, #32
5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vshr.s16    q11, q11, #5
5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vqmovn.s16  d8, q11
5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vand        d6, d6, d29                 @fract values in d1/ idx values in d0
5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d7, d28, d6                 @32-fract
5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmov.i8     d27, #7                     @row 0 to 7(row-1)
5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d2                  @ref_main_idx (add 1)
5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d26, d8                 @ref_main_idx
5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vadd.s8     d8, d8, d27                 @t0 compensate the pu1_src idx incremented by 8
5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d8, d2                  @ref_main_idx - 1
5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d8, d2                  @row 1 ref_main_idx
5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d9, d2
5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d12, {d0,d1}, d8            @load from ref_main_idx (row 0)
5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d13, {d0,d1}, d9            @load from ref_main_idx + 1 (row 0)
5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q12, d12, d7                @mul (row 0)
5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d16, {d0,d1}, d4            @load from ref_main_idx (row 1)
5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q12, d13, d6                @mul (row 0)
5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d8, d8, d3                  @idx (row 2)
5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d17, {d0,d1}, d5            @load from ref_main_idx + 1 (row 1)
5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d9, d9, d3                  @idx+1 (row 2)
5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q11, d16, d7                @mul (row 1)
5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d12, {d0,d1}, d8            @load from ref_main_idx (row 2)
5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q11, d17, d6                @mul (row 1)
5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d24, q12, #5                @round shift (row 0)
5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d4, d4, d3                  @idx (row 3)
5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d13, {d0,d1}, d9            @load from ref_main_idx + 1 (row 2)
5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vsub.s8     d5, d5, d3                  @idx+1 (row 3)
5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q10, d12, d7                @mul (row 2)
5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d16, {d0,d1}, d4            @load from ref_main_idx (row 3)
5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q10, d13, d6                @mul (row 2)
5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.32     d24[0], [r2], r3            @st row 0
5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d22, q11, #5                @round shift (row 1)
5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vtbl.8      d17, {d0,d1}, d5            @load from ref_main_idx + 1 (row 3)
5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmull.u8    q9, d16, d7                 @mul (row 3)
5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vmlal.u8    q9, d17, d6                 @mul (row 3)
5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.32     d22[0], [r2], r3            @st row 1
5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d20, q10, #5                @round shift (row 2)
5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.32     d20[0], [r2], r3            @st row 2
5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vrshrn.i16  d18, q9, #5                 @round shift (row 3)
5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    vst1.32     d18[0], [r2], r3            @st (row 3)
5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_func:
5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
574