10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* ihevc_intra_pred_luma_horz_neon.s 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* contains function definition for intra prediction interpolation filters 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @author 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* parthiban v 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @par list of functions: 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* - ihevc_intra_pred_luma_horz() 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @remarks 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* none 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/ 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///** 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @brief 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* intra prediction interpolation filter for horizontal luma variable. 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @par description: 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* horizontal intraprediction(mode 10) with.extern samples location 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* pointed by 'pu1_ref' to the tu block location pointed by 'pu1_dst' refer 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* to section 8.4.4.2.6 in the standard (special case) 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] pu1_src 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* uword8 pointer to the source 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[out] pu1_dst 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* uword8 pointer to the destination 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] src_strd 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer source stride 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] dst_strd 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer destination stride 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] nt 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer transform block size 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @param[in] mode 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* integer intraprediction mode 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @returns 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* @remarks 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* none 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/ 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//void ihevc_intra_pred_luma_horz(uword8 *pu1_ref, 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 src_strd, 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// uword8 *pu1_dst, 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 dst_strd, 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 nt, 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// word32 mode) 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//**************variables vs registers***************************************** 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x0 => *pu1_ref 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x1 => src_strd 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x2 => *pu1_dst 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x3 => dst_strd 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.align 4 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.include "ihevc_neon_macros.s" 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_intra_pred_luma_horz_av8 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevc_intra_pred_luma_horz_av8, %function 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_intra_pred_luma_horz_av8: 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments 1009cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stp x19, x20,[sp,#-16]! 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //ldr x5,[sp,#44] @loads mode 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar lsl x6,x4,#1 //two_nt 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x12,x0,x6 //*pu1_ref[two_nt] 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmp x4,#4 //if nt == 4 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar beq core_loop_4 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmp x4,#8 //if nt == 8 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar beq core_loop_8 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmp x4,#16 //if nt == 16 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar beq core_loop_16 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x12,x12,#16 //move to 16th value pointer 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x9,x2,#16 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_32: 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 { v0.16b},[x12] //load 16 values. d1[7] will have the 1st value. 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 122d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v2.16b, v0.b[15] //duplicate the i value. 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 124d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v4.16b, v0.b[14] //duplicate the ii value. 125d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v6.16b, v0.b[13] //duplicate the iii value. 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x2],x3 //store in 1st row 0-16 columns 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x9],x3 //store in 1st row 16-32 columns 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 129d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v1.16b, v0.b[12] 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x2],x3 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x9],x3 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 133d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v2.16b, v0.b[11] 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x2],x3 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x9],x3 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 137d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v4.16b, v0.b[10] 1389cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x2],x3 1399cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x9],x3 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 141d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v6.16b, v0.b[9] 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x2],x3 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x9],x3 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 145d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v1.16b, v0.b[8] 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x2],x3 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x9],x3 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 149d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v2.16b, v0.b[7] 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x2],x3 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x9],x3 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 153d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v4.16b, v0.b[6] 1549cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x2],x3 1559cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x9],x3 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 157d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v6.16b, v0.b[5] 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x2],x3 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x9],x3 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 161d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v1.16b, v0.b[4] 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x2],x3 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x9],x3 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 165d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v2.16b, v0.b[3] 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x2],x3 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x9],x3 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 169d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v4.16b, v0.b[2] 1709cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x2],x3 1719cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x9],x3 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 173d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v6.16b, v0.b[1] 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x2],x3 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x9],x3 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x12,x12,#16 //move to 16th value pointer 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 178d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v1.16b, v0.b[0] 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x2],x3 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x9],x3 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar subs x4,x4,#16 //decrement the loop count by 16 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x2],x3 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x9],x3 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1869cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x2],x3 1879cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x9],x3 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bgt core_loop_32 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // ldmfd sp!,{x4-x12,x15} //reload the registers from sp 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19, x20,[sp],#16 1919cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar b end_func 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_16: 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldrb w14,[x12],#1 //pu1_ref[two_nt] 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sxtw x14,w14 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 { v30.8b},[x12],#8 //pu1_ref[two_nt + 1 + col] 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 { v31.8b},[x12] //pu1_ref[two_nt + 1 + col] 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x12,x12,#8 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v28.8b,w14 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x12,x12,#17 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 { v0.16b},[x12] 205d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v26.8b, v0.b[15] 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uxtl v26.8h, v26.8b 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 208d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v2.16b, v0.b[14] 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar usubl v24.8h, v30.8b, v28.8b 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 211d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v4.16b, v0.b[13] 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sshr v24.8h, v24.8h,#1 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 214d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v6.16b, v0.b[12] 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sqadd v22.8h, v26.8h , v24.8h 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 217d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v1.16b, v0.b[11] 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sqxtun v22.8b, v22.8h 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v22.8b},[x2],#8 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 222d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v18.16b, v0.b[10] 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar usubl v24.8h, v31.8b, v28.8b 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 225d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v19.16b, v0.b[9] 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sshr v24.8h, v24.8h,#1 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 228d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v20.16b, v0.b[8] 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sqadd v22.8h, v26.8h , v24.8h 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 231d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v16.16b, v0.b[7] 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sqxtun v22.8b, v22.8h 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v22.8b},[x2],x3 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x2,x2,#8 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x2],x3 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x2],x3 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x2],x3 2419cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x2],x3 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 243d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v2.16b, v0.b[6] 2449cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v18.16b},[x2],x3 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 246d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v4.16b, v0.b[5] 2479cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v19.16b},[x2],x3 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 249d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v6.16b, v0.b[4] 2509cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v20.16b},[x2],x3 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 252d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v1.16b, v0.b[3] 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v16.16b},[x2],x3 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 255d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v18.16b, v0.b[2] 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v2.16b},[x2],x3 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 258d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v19.16b, v0.b[1] 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v4.16b},[x2],x3 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 261d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v20.16b, v0.b[0] 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 { v6.16b},[x2],x3 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2649cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v1.16b},[x2],x3 2659cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v18.16b},[x2],x3 2669cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v19.16b},[x2],x3 2679cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 { v20.16b},[x2],x3 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // ldmfd sp!,{x4-x12,x15} //reload the registers from sp 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19, x20,[sp],#16 2719cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar b end_func 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_8: 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldrb w14,[x12] //pu1_ref[two_nt] 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sxtw x14,w14 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x12,x12,#1 //pu1_ref[two_nt + 1] 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v30.8b},[x12] //pu1_ref[two_nt + 1 + col] 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x12,x12,#9 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.8b},[x12] 284d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v26.8b, v0.b[7] 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v28.8b,w14 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 287d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v3.8b, v0.b[6] 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uxtl v26.8h, v26.8b 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 290d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v4.8b, v0.b[5] 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar usubl v24.8h, v30.8b, v28.8b 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 293d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v5.8b, v0.b[4] 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sshr v24.8h, v24.8h,#1 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 296d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v6.8b, v0.b[3] 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sqadd v22.8h, v26.8h , v24.8h 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 299d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v7.8b, v0.b[2] 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sqxtun v22.8b, v22.8h 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v22.8b},[x2],x3 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v3.8b},[x2],x3 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 305d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v1.8b, v0.b[1] 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v4.8b},[x2],x3 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v5.8b},[x2],x3 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 309d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v17.8b, v0.b[0] 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v6.8b},[x2],x3 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v7.8b},[x2],x3 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3139cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v1.8b},[x2],x3 3149cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy st1 {v17.8b},[x2],x3 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // ldmfd sp!,{x4-x12,x15} //reload the registers from sp 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19, x20,[sp],#16 3179cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar b end_func 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarcore_loop_4: 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldrb w14,[x12] //pu1_ref[two_nt] 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sxtw x14,w14 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add x12,x12,#1 //pu1_ref[two_nt + 1] 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v30.8b},[x12] //pu1_ref[two_nt + 1 + col] 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x12,x12,#5 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ld1 {v0.8b},[x12] 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v28.8b,w14 331d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v26.8b, v0.b[3] 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uxtl v26.8h, v26.8b 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 334d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v3.8b, v0.b[2] 3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar usubl v24.8h, v30.8b, v28.8b 3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 337d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v4.8b, v0.b[1] 3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sshr v24.8h, v24.8h,#1 3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 340d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v5.8b, v0.b[0] 3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sqadd v22.8h, v26.8h , v24.8h 3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sqxtun v22.8b, v22.8h 3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v22.s}[0],[x2],x3 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v3.s}[0],[x2],x3 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v4.s}[0],[x2],x3 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar st1 {v5.s}[0],[x2],x3 3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // ldmfd sp!,{x4-x12,x15} //reload the registers from sp 3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19, x20,[sp],#16 3529cbd70a2930875be59d7df68136ac9a1a949a13dNaveen Kumar Ponnusamy 3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarend_func: 3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 358