10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///*****************************************************************************
20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Licensed under the Apache License, Version 2.0 (the "License");
60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* you may not use this file except in compliance with the License.
70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* You may obtain a copy of the License at:
80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* http://www.apache.org/licenses/LICENSE-2.0
100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Unless required by applicable law or agreed to in writing, software
120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* distributed under the License is distributed on an "AS IS" BASIS,
130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* See the License for the specific language governing permissions and
150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* limitations under the License.
160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*****************************************************************************/
180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///**
190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************
200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @file
210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  ihevc_intra_pred_luma_horz_neon.s
220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @brief
240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  contains function definition for intra prediction  interpolation filters
250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @author
280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  parthiban v
290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @par list of functions:
310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  - ihevc_intra_pred_luma_horz()
320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @remarks
340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  none
350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************
370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/
380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//
390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///**
400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************
410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @brief
430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*     intra prediction interpolation filter for horizontal luma variable.
440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @par description:
460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*      horizontal intraprediction(mode 10) with.extern  samples location
470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*      pointed by 'pu1_ref' to the tu block  location pointed by 'pu1_dst'  refer
480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*      to section 8.4.4.2.6 in the standard (special case)
490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] pu1_src
510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  uword8 pointer to the source
520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[out] pu1_dst
540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  uword8 pointer to the destination
550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] src_strd
570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  integer source stride
580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] dst_strd
600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  integer destination stride
610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] nt
630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  integer transform block size
640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] mode
660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  integer intraprediction mode
670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @returns
690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @remarks
710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  none
720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************
740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/
750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//void ihevc_intra_pred_luma_horz(uword8 *pu1_ref,
760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                                word32 src_strd,
770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                                uword8 *pu1_dst,
780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                                word32 dst_strd,
790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                                word32 nt,
800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//                                word32 mode)
810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//**************variables vs registers*****************************************
820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x0 => *pu1_ref
830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x1 =>  src_strd
840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x2 => *pu1_dst
850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x3 =>  dst_strd
860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text
880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4
890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.include "ihevc_neon_macros.s"
900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_intra_pred_luma_horz_av8
940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_intra_pred_luma_horz_av8, %function
960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_intra_pred_luma_horz_av8:
980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // stmfd sp!, {x4-x12, x14}                //stack stores the values of the arguments
1009cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy
1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    stp         x19, x20,[sp,#-16]!
1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    //ldr          x5,[sp,#44]                        @loads mode
1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    lsl         x6,x4,#1                    //two_nt
1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x12,x0,x6                   //*pu1_ref[two_nt]
1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         x4,#4                       //if nt == 4
1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         core_loop_4
1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         x4,#8                       //if nt == 8
1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         core_loop_8
1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cmp         x4,#16                      //if nt == 16
1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    beq         core_loop_16
1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x12,x12,#16                 //move to 16th value pointer
1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x9,x2,#16
1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_32:
1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         { v0.16b},[x12]             //load 16 values. d1[7] will have the 1st value.
1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
122d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v2.16b, v0.b[15]            //duplicate the i value.
1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
124d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v4.16b, v0.b[14]            //duplicate the ii value.
125d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v6.16b, v0.b[13]            //duplicate the iii value.
1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x2],x3           //store in 1st row 0-16 columns
1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x9],x3           //store in 1st row 16-32 columns
1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
129d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v1.16b, v0.b[12]
1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x2],x3
1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x9],x3
1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
133d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v2.16b, v0.b[11]
1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x2],x3
1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x9],x3
1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
137d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v4.16b, v0.b[10]
1389cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x2],x3
1399cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x9],x3
1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
141d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v6.16b, v0.b[9]
1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x2],x3
1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x9],x3
1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
145d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v1.16b, v0.b[8]
1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x2],x3
1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x9],x3
1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
149d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v2.16b, v0.b[7]
1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x2],x3
1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x9],x3
1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
153d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v4.16b, v0.b[6]
1549cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x2],x3
1559cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x9],x3
1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
157d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v6.16b, v0.b[5]
1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x2],x3
1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x9],x3
1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
161d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v1.16b, v0.b[4]
1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x2],x3
1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x9],x3
1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
165d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v2.16b, v0.b[3]
1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x2],x3
1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x9],x3
1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
169d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v4.16b, v0.b[2]
1709cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x2],x3
1719cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x9],x3
1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
173d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v6.16b, v0.b[1]
1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x2],x3
1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x9],x3
1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x12,x12,#16                 //move to 16th value pointer
1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
178d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v1.16b, v0.b[0]
1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x2],x3
1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x9],x3
1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    subs        x4,x4,#16                   //decrement the loop count by 16
1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x2],x3
1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x9],x3
1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1869cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x2],x3
1879cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x9],x3
1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    bgt         core_loop_32
1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // ldmfd sp!,{x4-x12,x15}                  //reload the registers from sp
1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldp         x19, x20,[sp],#16
1919cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy
1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ret
1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_func
1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_16:
1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldrb        w14,[x12],#1                //pu1_ref[two_nt]
1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x14,w14
1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         { v30.8b},[x12],#8          //pu1_ref[two_nt + 1 + col]
1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         { v31.8b},[x12]             //pu1_ref[two_nt + 1 + col]
2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x12,x12,#8
2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v28.8b,w14
2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x12,x12,#17
2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         { v0.16b},[x12]
205d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v26.8b, v0.b[15]
2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    uxtl        v26.8h, v26.8b
2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
208d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v2.16b, v0.b[14]
2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    usubl       v24.8h, v30.8b, v28.8b
2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
211d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v4.16b, v0.b[13]
2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sshr        v24.8h, v24.8h,#1
2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
214d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v6.16b, v0.b[12]
2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sqadd       v22.8h,  v26.8h ,  v24.8h
2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
217d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v1.16b, v0.b[11]
2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sqxtun      v22.8b, v22.8h
2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.8b},[x2],#8
2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
222d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v18.16b, v0.b[10]
2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    usubl       v24.8h, v31.8b, v28.8b
2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
225d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v19.16b, v0.b[9]
2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sshr        v24.8h, v24.8h,#1
2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
228d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v20.16b, v0.b[8]
2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sqadd       v22.8h,  v26.8h ,  v24.8h
2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
231d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v16.16b, v0.b[7]
2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sqxtun      v22.8b, v22.8h
2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.8b},[x2],x3
2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x2,x2,#8
2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x2],x3
2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x2],x3
2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x2],x3
2419cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x2],x3
2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
243d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v2.16b, v0.b[6]
2449cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v18.16b},[x2],x3
2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
246d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v4.16b, v0.b[5]
2479cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v19.16b},[x2],x3
2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
249d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v6.16b, v0.b[4]
2509cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v20.16b},[x2],x3
2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
252d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v1.16b, v0.b[3]
2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v16.16b},[x2],x3
2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
255d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v18.16b, v0.b[2]
2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v2.16b},[x2],x3
2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
258d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v19.16b, v0.b[1]
2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v4.16b},[x2],x3
2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
261d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v20.16b, v0.b[0]
2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         { v6.16b},[x2],x3
2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2649cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v1.16b},[x2],x3
2659cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v18.16b},[x2],x3
2669cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v19.16b},[x2],x3
2679cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         { v20.16b},[x2],x3
2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // ldmfd sp!,{x4-x12,x15}                  //reload the registers from sp
2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldp         x19, x20,[sp],#16
2719cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy
2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ret
2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_func
2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_8:
2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldrb        w14,[x12]                   //pu1_ref[two_nt]
2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x14,w14
2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x12,x12,#1                  //pu1_ref[two_nt + 1]
2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v30.8b},[x12]              //pu1_ref[two_nt + 1 + col]
2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x12,x12,#9
2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v0.8b},[x12]
284d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v26.8b, v0.b[7]
2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v28.8b,w14
2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
287d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v3.8b, v0.b[6]
2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    uxtl        v26.8h, v26.8b
2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
290d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v4.8b, v0.b[5]
2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    usubl       v24.8h, v30.8b, v28.8b
2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
293d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v5.8b, v0.b[4]
2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sshr        v24.8h, v24.8h,#1
2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
296d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v6.8b, v0.b[3]
2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sqadd       v22.8h,  v26.8h ,  v24.8h
2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
299d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v7.8b, v0.b[2]
3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sqxtun      v22.8b, v22.8h
3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.8b},[x2],x3
3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v3.8b},[x2],x3
3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
305d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v1.8b, v0.b[1]
3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v4.8b},[x2],x3
3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v5.8b},[x2],x3
3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
309d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v17.8b, v0.b[0]
3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v6.8b},[x2],x3
3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v7.8b},[x2],x3
3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3139cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         {v1.8b},[x2],x3
3149cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy    st1         {v17.8b},[x2],x3
3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // ldmfd sp!,{x4-x12,x15}                  //reload the registers from sp
3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldp         x19, x20,[sp],#16
3179cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy
3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ret
3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    b           end_func
3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_4:
3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldrb        w14,[x12]                   //pu1_ref[two_nt]
3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x14,w14
3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         x12,x12,#1                  //pu1_ref[two_nt + 1]
3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v30.8b},[x12]              //pu1_ref[two_nt + 1 + col]
3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x12,x12,#5
3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v0.8b},[x12]
3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    dup         v28.8b,w14
331d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v26.8b, v0.b[3]
3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    uxtl        v26.8h, v26.8b
3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
334d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v3.8b, v0.b[2]
3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    usubl       v24.8h, v30.8b, v28.8b
3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
337d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v4.8b, v0.b[1]
3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sshr        v24.8h, v24.8h,#1
3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
340d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer    dup         v5.8b, v0.b[0]
3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sqadd       v22.8h,  v26.8h ,  v24.8h
3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sqxtun      v22.8b, v22.8h
3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v22.s}[0],[x2],x3
3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v3.s}[0],[x2],x3
3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v4.s}[0],[x2],x3
3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v5.s}[0],[x2],x3
3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // ldmfd sp!,{x4-x12,x15}                  //reload the registers from sp
3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldp         x19, x20,[sp],#16
3529cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy
3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ret
3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_func:
3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
358