10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///*****************************************************************************
20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Licensed under the Apache License, Version 2.0 (the "License");
60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* you may not use this file except in compliance with the License.
70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* You may obtain a copy of the License at:
80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* http://www.apache.org/licenses/LICENSE-2.0
100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Unless required by applicable law or agreed to in writing, software
120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* distributed under the License is distributed on an "AS IS" BASIS,
130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* See the License for the specific language governing permissions and
150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* limitations under the License.
160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*****************************************************************************/
180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///**
190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************
200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @file
210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  ihevc_intra_pred_luma_mode_3_to_9.s
220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @brief
240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  contains function definitions for intra prediction dc filtering.
250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* functions are coded using neon  intrinsics and can be compiled using
260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* rvct
280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @author
300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  parthiban v
310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @par list of functions:
330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @remarks
360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  none
370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************
390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/
400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///**
410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************
420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @brief
440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*    luma intraprediction filter for dc input
450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @par description:
470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] pu1_ref
490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  uword8 pointer to the source
500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[out] pu1_dst
520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  uword8 pointer to the destination
530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] src_strd
550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  integer source stride
560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] dst_strd
580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  integer destination stride
590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] nt
610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  size of tranform block
620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] mode
640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  type of filtering
650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @returns
670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @remarks
690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  none
700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************
720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/
730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//void ihevc_intra_pred_luma_mode_3_to_9(uword8* pu1_ref,
750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                               word32 src_strd,
760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                               uword8* pu1_dst,
770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                               word32 dst_strd,
780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                               word32 nt,
790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                               word32 mode)
800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//
810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//**************variables vs registers*****************************************
820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x0 => *pu1_ref
830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x1 => src_strd
840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x2 => *pu1_dst
850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x3 => dst_strd
860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//stack contents from #40
880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//    nt
890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//    mode
900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text
920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4
930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.include "ihevc_neon_macros.s"
940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_intra_pred_luma_mode_3_to_9_av8
980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gai4_ihevc_ang_table
990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gai4_ihevc_inv_ang_table
1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern col_for_intra_luma
1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern idx_neg_idx_3_9
1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_intra_pred_luma_mode_3_to_9_av8, %function
1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_intra_pred_luma_mode_3_to_9_av8:
1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // stmfd sp!, {x4-x12, x14}        //stack stores the values of the arguments
1099cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy
1109cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    stp         d12,d13,[sp,#-16]!
1119cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    stp         d14,d15,[sp,#-16]!
1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    stp         x19, x20,[sp,#-16]!
1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    adrp        x7,  :got:gai4_ihevc_ang_table
1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         x7,  [x7, #:got_lo12:gai4_ihevc_ang_table]
1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    adrp        x8,  :got:gai4_ihevc_inv_ang_table
1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         x8,  [x8, #:got_lo12:gai4_ihevc_inv_ang_table]
1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x7, x7, x5, lsl #2          //gai4_ihevc_ang_table[mode]
1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         w7,  [x7]                   //intra_pred_ang
1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x7,w7
1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v30.8b,w7                   //intra_pred_ang
1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    adrp        x14,  :got:col_for_intra_luma
1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         x14,  [x14, #:got_lo12:col_for_intra_luma]
1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         x4, #4
1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         sz_4_proc
1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           prologue_8_16_32
1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarprologue_8_16_32:
1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsr         x10, x4, #3
1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v31.8b},[x14],#8
1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mul         x10, x4, x10                //block counter (dec by #8)
1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x11, x4                     //col counter to be inc/dec by #8
1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    smull       v22.8h, v30.8b, v31.8b      //(col+1)*intra_pred_angle [0:7](col)
1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x7, x5, #3
1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v2.8b, #1                   //contains #1 for adding to get ref_main_idx + 1
1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    adrp        x12, :got:idx_neg_idx_3_9   //load least idx table
1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         x12, [x12, #:got_lo12:idx_neg_idx_3_9]
1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v3.8b, #2
1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x12, x12, x7, lsl #4
1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x8, x12
1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x7, #8
1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x7, x7, x3, lsl #3          //x7 = 8-8x3
1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         w9,  [x8]
1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x9,w9
1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x1, x0, x4, lsl #1          //pu1_ref + nt
1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    xtn         v6.8b,  v22.8h
1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v26.8b,w9                   //least idx added to final idx values
1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x1, x1, #9                  //ref_main_idx + 2nt - (8 + 1)(two_nt - idx - row ) for 8 & 8 - 1row
1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x6, x1, x9
1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v0.16b}, [x6]              //stores the 32 values reqd based on indices values (from least idx)
1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sshr        v22.8h, v22.8h,#5
1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v29.8b, #31                 //contains #31 for vand operation
1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v28.8b, #32
1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1709cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sqxtn       v1.8b,  v22.8h
1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    and         v6.8b,  v6.8b ,  v29.8b     //fract values in d1/ idx values in d0
1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x0, #1
1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v27.8b, #7                  //row 0 to 7
1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1789cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v2.8b      //ref_main_idx (sub row)
1799cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v26.8b ,  v1.8b     //ref_main_idx (row 0)
1809cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    add         v1.8b,  v1.8b ,  v27.8b     //t0 compensate the pu1_src idx incremented by 8
1819cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v1.8b ,  v2.8b     //ref_main_idx + 1 (row 0)
1829cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v12.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 0)
1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v7.8b,  v28.8b ,  v6.8b     //32-fract
1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1859cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v13.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 0)
1869cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v4.8b,  v1.8b ,  v2.8b      //ref_main_idx (row 1)
1879cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v5.8b,  v19.8b ,  v2.8b     //ref_main_idx + 1 (row 1)
1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v16.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 1)
1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v24.8h, v12.8b, v7.8b       //mul (row 0)
1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v24.8h, v13.8b, v6.8b       //mul (row 0)
1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v17.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 1)
1949cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v3.8b      //ref_main_idx (row 2)
1959cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v19.8b ,  v3.8b    //ref_main_idx + 1 (row 2)
1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v24.8b, v24.8h,#5           //round shft (row 0)
1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1999cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v14.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 2)
2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v22.8h, v16.8b, v7.8b       //mul (row 1)
2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v22.8h, v17.8b, v6.8b       //mul (row 1)
2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2039cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v15.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 2)
2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v4.8b,  v4.8b ,  v3.8b      //ref_main_idx (row 3)
2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v5.8b,  v5.8b ,  v3.8b      //ref_main_idx + 1 (row 3)
2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v24.8b},[x2], x3           //st (row 0)
2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v22.8b, v22.8h,#5           //round shft (row 1)
2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2109cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v23.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 3)
2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v20.8h, v14.8b, v7.8b       //mul (row 2)
2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v20.8h, v15.8b, v6.8b       //mul (row 2)
2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2149cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v25.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 3)
2159cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v3.8b      //ref_main_idx (row 4)
2169cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v19.8b ,  v3.8b    //ref_main_idx + 1 (row 4)
2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.8b},[x2], x3           //st (row 1)
2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v20.8b, v20.8h,#5           //round shft (row 2)
2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2219cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v12.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 4)
2229cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umull       v18.8h, v23.8b, v7.8b       //mul (row 3)
2239cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umlal       v18.8h, v25.8b, v6.8b       //mul (row 3)
2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2259cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v13.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 4)
2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v4.8b,  v4.8b ,  v3.8b      //ref_main_idx (row 5)
2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v5.8b,  v5.8b ,  v3.8b      //ref_main_idx + 1 (row 5)
2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v20.8b},[x2], x3           //st (row 2)
2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v18.8b, v18.8h,#5           //round shft (row 3)
2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v16.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 5)
2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v24.8h, v12.8b, v7.8b       //mul (row 4)
2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v24.8h, v13.8b, v6.8b       //mul (row 4)
2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v17.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 5)
2379cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v3.8b      //ref_main_idx (row 6)
2389cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v19.8b ,  v3.8b    //ref_main_idx + 1 (row 6)
2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v18.8b},[x2], x3           //st (row 3)
2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v24.8b, v24.8h,#5           //round shft (row 4)
2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2439cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v14.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 6)
2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v22.8h, v16.8b, v7.8b       //mul (row 5)
2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v22.8h, v17.8b, v6.8b       //mul (row 5)
2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2479cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v15.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 6)
2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v4.8b,  v4.8b ,  v3.8b      //ref_main_idx (row 7)
2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v5.8b,  v5.8b ,  v3.8b      //ref_main_idx + 1 (row 7)
2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v24.8b},[x2], x3           //st (row 4)
2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v22.8b, v22.8h,#5           //round shft (row 5)
2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2549cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v23.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 7)
2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v20.8h, v14.8b, v7.8b       //mul (row 6)
2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v20.8h, v15.8b, v6.8b       //mul (row 6)
2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2589cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v25.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 7)
2599cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umull       v18.8h, v23.8b, v7.8b       //mul (row 7)
2609cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umlal       v18.8h, v25.8b, v6.8b       //mul (row 7)
2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.8b},[x2], x3           //st (row 5)
2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v20.8b, v20.8h,#5           //round shft (row 6)
2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v18.8b, v18.8h,#5           //round shft (row 7)
2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v20.8b},[x2], x3           //st (row 6)
2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        x10, x10, #8                //subtract 8 and go to end if 8x8
2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v18.8b},[x2], x3           //st (row 7)
2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         end_func
2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        x11, x11, #8
2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x20, x8, #4
2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x8, x20, x8,gt
2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x20, x2, x7
2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x2, x20, x2,gt
2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x8, x12, x8,le
2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x20, x2, x4
2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x2, x20, x2,le
2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x20, x2, #8
2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x2, x20, x2,le
2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x11, x4, x11,le
2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         lbl284
2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    adrp        x14,  :got:col_for_intra_luma
2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         x14,  [x14, #:got_lo12:col_for_intra_luma]
2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarlbl284:
2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x20, x0, #8
2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x0, x20, x0,le
2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x5,x2
2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v31.8b},[x14],#8
2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    smull       v12.8h, v30.8b, v31.8b      //(col+1)*intra_pred_angle [0:7](col)
2959cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    xtn         v23.8b,  v12.8h
2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sshr        v12.8h, v12.8h,#5
2979cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sqxtn       v25.8b,  v12.8h
2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         w9,  [x8]
2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x9,w9
3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x9, x0, x9
3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x9, x9, #1
3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v26.8b,w9
3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v16.8b, #8
3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x4,x4,#8
3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarkernel_8_16_32:
3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3099cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v26.8b ,  v25.8b    //ref_main_idx
3109cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    mov         v26.8b, v23.8b
3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        x11, x11, #8
3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x6, x1, x9
3149cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v23.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 7)
3159cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    add         v1.8b,  v1.8b ,  v16.8b     //to compensate the pu1_src idx incremented by 8
3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v20.8h, v14.8b, v7.8b       //mul (row 6)
3189cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v25.8b, {v0.16b},v5.8b      //load from ref_main_idx - 1 (row 7)
3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v20.8h, v15.8b, v6.8b       //mul (row 6)
3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3219cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v1.8b ,  v2.8b     //ref_main_idx - 1
3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x20, x0, #8
3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x0, x20, x0,le
3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x20, x8, #4
3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x8, x20, x8,gt
3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v0.16b}, [x6]              //stores the 32 values reqd based on indices values (from least idx)
3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v24.8b},[x5], x3           //st (row 4)
3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v22.8b, v22.8h,#5           //round shft (row 5)
3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         lbl323
3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    adrp        x14,  :got:col_for_intra_luma
3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         x14,  [x14, #:got_lo12:col_for_intra_luma]
3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarlbl323:
3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x8, x12, x8,le
3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v27.8b,w0                   //row value inc or reset accordingly
3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3389cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v4.8b,  v1.8b ,  v2.8b      //ref_main_idx (row 1)
3399cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v12.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 0)
3409cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v5.8b,  v19.8b ,  v2.8b     //ref_main_idx - 1 (row 1)
3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3439cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umull       v18.8h, v23.8b, v7.8b       //mul (row 7)
3449cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v13.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 0)
3459cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umlal       v18.8h, v25.8b, v6.8b       //mul (row 7)
3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v31.8b},[x14],#8
3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    and         v6.8b,  v29.8b ,  v26.8b    //fract values in d1/ idx values in d0
3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.8b},[x5], x3           //(from previous loop)st (row 5)
3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v20.8b, v20.8h,#5           //(from previous loop)round shft (row 6)
3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3539cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v3.8b      //ref_main_idx (row 2)
3549cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v23.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 1)
3559cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v19.8b ,  v3.8b    //ref_main_idx - 1 (row 2)
3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x20, x4, #8
3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x11, x20, x11,le
3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         w9,  [x8]
3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x9,w9
3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v7.8b,  v28.8b ,  v6.8b     //32-fract
3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v24.8h, v12.8b, v7.8b       //mul (row 0)
3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v17.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 1)
3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v24.8h, v13.8b, v6.8b       //mul (row 0)
3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v20.8b},[x5], x3           //(from previous loop)st (row 6)
3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v18.8b, v18.8h,#5           //(from previous loop)round shft (row 7)
3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v4.8b,  v4.8b ,  v3.8b      //ref_main_idx (row 3)
3719cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v14.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 2)
3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v5.8b,  v5.8b ,  v3.8b      //ref_main_idx - 1 (row 3)
3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3749cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umull       v22.8h, v23.8b, v7.8b       //mul (row 1)
3759cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v15.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 2)
3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v22.8h, v17.8b, v6.8b       //mul (row 1)
3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v24.8b, v24.8h,#5           //round shft (row 0)
3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v18.8b},[x5], x3           //(from previous loop)st (row 7)
3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3819cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v3.8b      //ref_main_idx (row 4)
3829cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v23.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 3)
3839cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v19.8b ,  v3.8b    //ref_main_idx - 1 (row 4)
3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v20.8h, v14.8b, v7.8b       //mul (row 2)
3869cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v25.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 3)
3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v20.8h, v15.8b, v6.8b       //mul (row 2)
3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    smull       v14.8h, v30.8b, v31.8b      //(col+1)*intra_pred_angle [0:7](col)
3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x5,x2,x3,lsl#2
3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x9, x0, x9
3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v24.8b},[x2], x3           //st (row 0)
3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v22.8b, v22.8h,#5           //round shft (row 1)
3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v4.8b,  v4.8b ,  v3.8b      //ref_main_idx (row 5)
3979cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v12.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 4)
3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v5.8b,  v5.8b ,  v3.8b      //ref_main_idx - 1 (row 5)
3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4009cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umull       v18.8h, v23.8b, v7.8b       //mul (row 3)
4019cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v13.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 4)
4029cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umlal       v18.8h, v25.8b, v6.8b       //mul (row 3)
4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.8b},[x2], x3           //st (row 1)
4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v20.8b, v20.8h,#5           //round shft (row 2)
4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4079cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    xtn         v23.8b,  v14.8h
4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sshr        v14.8h, v14.8h,#5
4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4109cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v3.8b      //ref_main_idx (row 6)
4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v21.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 5)
4129cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v19.8b ,  v3.8b    //ref_main_idx - 1 (row 6)
4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v24.8h, v12.8b, v7.8b       //mul (row 4)
4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v17.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 5)
4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v24.8h, v13.8b, v6.8b       //mul (row 4)
4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v20.8b},[x2], x3           //st (row 2)
4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v18.8b, v18.8h,#5           //round shft (row 3)
4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x9, x9, #1
4229cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sqxtn       v25.8b,  v14.8h
4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v4.8b,  v4.8b ,  v3.8b      //ref_main_idx (row 7)
4259cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v14.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 6)
4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v5.8b,  v5.8b ,  v3.8b      //ref_main_idx - 1 (row 7)
4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v22.8h, v21.8b, v7.8b       //mul (row 5)
4299cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v15.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 6)
4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v22.8h, v17.8b, v6.8b       //mul (row 5)
4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4329cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    add         v25.8b,  v27.8b ,  v25.8b   //ref_main_idx (add row)
4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v26.8b,w9
4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v18.8b},[x2], x3           //st (row 3)
4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v24.8b, v24.8h,#5           //round shft (row 4)
4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x2, x2, x3, lsl #2
4399cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v25.8b,  v25.8b ,  v2.8b    //ref_main_idx -1 (sub 1)
4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x20, x7, x2
4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x2, x20, x2,gt
4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x20, x2, x4
4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x2, x20, x2,le
4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        x10, x10, #8                //subtract 8 and go to end if 8x8
4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bne         kernel_8_16_32
4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarepil_8_16_32:
4519cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v23.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 7)
4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v20.8h, v14.8b, v7.8b       //mul (row 6)
4549cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v25.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 7)
4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v20.8h, v15.8b, v6.8b       //mul (row 6)
4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v24.8b},[x5], x3           //st (row 4)
4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v24.8b, v22.8h,#5           //round shft (row 5)
4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4609cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umull       v18.8h, v23.8b, v7.8b       //mul (row 7)
4619cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    umlal       v18.8h, v25.8b, v6.8b       //mul (row 7)
4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v24.8b},[x5], x3           //(from previous loop)st (row 5)
4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v20.8b, v20.8h,#5           //(from previous loop)round shft (row 6)
4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v20.8b},[x5], x3           //(from previous loop)st (row 6)
4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v18.8b, v18.8h,#5           //(from previous loop)round shft (row 7)
4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v18.8b},[x5], x3           //st (row 7)
4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_func
4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarsz_4_proc:
4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v31.8b},[x14]
4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v2.8b, #1                   //contains #1 for adding to get ref_main_idx - 1
4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v3.8b, #2
4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    adrp        x12, :got:idx_neg_idx_3_9   //load least idx table
4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         x12, [x12, #:got_lo12:idx_neg_idx_3_9]
4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    smull       v22.8h, v30.8b, v31.8b      //(col+1)*intra_pred_angle [0:7](col)
4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x7, x5, #3
4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x12, x12, x7, lsl #4
4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x8, x12
4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldr         w9,  [x8]
4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x9,w9
4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v26.8b,w9                   //least idx added to final idx values
4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x6, x0, x4, lsl #1          //pu1_ref + 2nt
4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    xtn         v6.8b,  v22.8h
4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x6, x6, #9                  //ref_main_idx + 2nt - (8 + 1)(two_nt - idx - row ) for 8 & 8 - 1row
4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x6, x6, x9
4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v0.16b}, [x6]              //stores the 32 values reqd based on indices values (from least idx)
4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v29.8b, #31                 //contains #31 for vand operation
5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v28.8b, #32
5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sshr        v22.8h, v22.8h,#5
5049cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sqxtn       v1.8b,  v22.8h
5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    and         v6.8b,  v6.8b ,  v29.8b     //fract values in d1/ idx values in d0
5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v7.8b,  v28.8b ,  v6.8b     //32-fract
5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    movi        v27.8b, #7                  //row 0 to 7(row-1)
5109cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v2.8b      //ref_main_idx (add 1)
5119cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v26.8b ,  v1.8b     //ref_main_idx
5129cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    add         v1.8b,  v1.8b ,  v27.8b     //t0 compensate the pu1_src idx incremented by 8
5139cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v1.8b ,  v2.8b     //ref_main_idx - 1
5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5159cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v4.8b,  v1.8b ,  v2.8b      //row 1 ref_main_idx
5169cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v5.8b,  v19.8b ,  v2.8b
5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5189cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v12.8b, {v0.16b},v1.8b      //load from ref_main_idx (row 0)
5199cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v13.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 0)
5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v24.8h, v12.8b, v7.8b       //mul (row 0)
5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v16.8b, {v0.16b},v4.8b      //load from ref_main_idx    (row 1)
5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v24.8h, v13.8b, v6.8b       //mul (row 0)
5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5269cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v1.8b,  v1.8b ,  v3.8b      //idx (row 2)
5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v17.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 1)
5289cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    sub         v19.8b,  v19.8b ,  v3.8b    //idx+1 (row 2)
5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v22.8h, v16.8b, v7.8b       //mul (row 1)
5319cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v12.8b, {v0.16b},v1.8b      //load from ref_main_idx    (row 2)
5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v22.8h, v17.8b, v6.8b       //mul (row 1)
5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v24.8b, v24.8h,#5           //round shift (row 0)
5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v4.8b,  v4.8b ,  v3.8b      //idx (row 3)
5379cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    tbl         v13.8b, {v0.16b},v19.8b     //load from ref_main_idx + 1 (row 2)
5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         v5.8b,  v5.8b ,  v3.8b      //idx+1 (row 3)
5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v20.8h, v12.8b, v7.8b       //mul (row 2)
5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v16.8b, {v0.16b},v4.8b      //load from ref_main_idx (row 3)
5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v20.8h, v13.8b, v6.8b       //mul (row 2)
5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v24.s}[0],[x2], x3         //st row 0
5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v22.8b, v22.8h,#5           //round shift (row 1)
5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    tbl         v17.8b, {v0.16b},v5.8b      //load from ref_main_idx + 1 (row 3)
5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umull       v18.8h, v16.8b, v7.8b       //mul (row 3)
5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    umlal       v18.8h, v17.8b, v6.8b       //mul (row 3)
5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.s}[0],[x2], x3         //st row 1
5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v20.8b, v20.8h,#5           //round shift (row 2)
5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v20.s}[0],[x2], x3         //st row 2
5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    rshrn       v18.8b, v18.8h,#5           //round shift (row 3)
5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v18.s}[0],[x2], x3         //st (row 3)
5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_func:
5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // ldmfd sp!,{x4-x12,x15}          //reload the registers from sp
5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldp         x19, x20,[sp],#16
5649cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    ldp         d14,d15,[sp],#16
5659cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    ldp         d12,d13,[sp],#16
5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ret
5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
571