10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* ,:file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* ihevc_sao_band_offset_chroma.s 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* ,:brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Contains function definitions for inter prediction interpolation. 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Functions are coded using NEON intrinsics and can be compiled using@ ARM 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* RVCT 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* ,:author 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Parthiban V 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* ,:par List of Functions: 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* ,:remarks 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* None 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//******************************************************************************* 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*/ 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//void ihevc_sao_band_offset_chroma(UWORD8 *pu1_src, 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// WORD32 src_strd, 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// UWORD8 *pu1_src_left, 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// UWORD8 *pu1_src_top, 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// UWORD8 *pu1_src_top_left, 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// WORD32 sao_band_pos_u, 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// WORD32 sao_band_pos_v, 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// WORD8 *pi1_sao_offset_u, 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// WORD8 *pi1_sao_offset_v, 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// WORD32 wd, 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// WORD32 ht) 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//**************Variables Vs Registers***************************************** 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x0 => *pu1_src 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x1 => src_strd 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x2 => *pu1_src_left 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x3 => *pu1_src_top 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x4 => *pu1_src_top_left 40 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x5 => sao_band_pos_u 44 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x6 => sao_band_pos_v 48 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x7 => *pi1_sao_offset_u 52 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x8 => *pi1_sao_offset_v 56 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x9 => wd 60 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//x10=> ht 64 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.p2align 2 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.include "ihevc_neon_macros.s" 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl gu1_table_band_idx 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_sao_band_offset_chroma_av8 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_sao_band_offset_chroma_av8: 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x8,#0 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x9,#0 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x10,#0 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr x8,[sp,#0] 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr w9,[sp,#8] 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldr w10,[sp,#16] 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar push_v_regs 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stp x19, x20,[sp,#-16]! 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stp x21, x22,[sp,#-16]! 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar stp x23, x24,[sp,#-16]! 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x15,x4 // pu1_src_top_left 40 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x16,x5 // sao_band_pos_u 44 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x17,x6 // sao_band_pos_v 48 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x19,x7 // pi1_sao_offset_u 52 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x20,x8 // pi1_sao_offset_v 56 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x21,x9 // wd 60 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov x22,x10 // ht 64 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x4, x15 //Loads pu1_src_top_left 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x10, x22 //Loads ht 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x9, x21 //Loads wd 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x11,x10 //Move the ht to x9 for loop counter 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x12,x0,x9 //pu1_src[row * src_strd + (wd)] 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADRP x14, :got:gu1_table_band_idx 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR x14, [x14, #:got_lo12:gu1_table_band_idx] 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB x12,x12,#2 //wd-2 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP: 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH w5,[x12] //Load the value 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x12,x12,x1 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS x11,x11,#1 //Decrement the loop counter 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH w5,[x2],#2 //Store the value in pu1_src_left pointer 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x5, x16 //Loads sao_band_pos_u 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v1.8b},[x14],#8 //band_table_u.val[0] 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x12,x3,x9 //pu1_src_top[wd] 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sub x23,x12,#2 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH w11,[x23] 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v2.8b},[x14],#8 //band_table_u.val[1] 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL x6,x5,#3 //sao_band_pos_u 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH w11,[x4] //store to pu1_src_top_left[0] 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v3.8b},[x14],#8 //band_table_u.val[2] 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x7, x19 //Loads pi1_sao_offset_u 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB x4,x10,#1 //ht-1 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v31.8b,w6 //band_pos_u 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mul x4, x4, x1 //ht-1 * src_strd 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x4,x4,x0 //pu1_src[(ht - 1) * src_strd] 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v4.8b},[x14],#8 //band_table_u.val[3] 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x11,x9 //Move the wd to x9 for loop counter 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_TOP_LOOP: //wd is always multiple of 8 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v0.8b},[x4],#8 //Load pu1_src[(ht - 1) * src_strd + col] 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS x11,x11,#8 //Decrement the loop counter by 8 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST1 {v0.8b},[x3],#8 //Store to pu1_src_top[col] 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_TOP_LOOP 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v30.8b},[x7] //pi1_sao_offset_u load 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v5.8b, v1.8b , v31.8b //band_table_u.val[0] = vadd_u8(band_table_u.val[0], sao_band_pos_u) 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 143d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v29.8b, v30.b[1] //vdup_n_u8(pi1_sao_offset_u[1]) 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v6.8b, v2.8b , v31.8b //band_table_u.val[1] = vadd_u8(band_table_u.val[1], sao_band_pos_u) 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 146d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v28.8b, v30.b[2] //vdup_n_u8(pi1_sao_offset_u[2]) 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v7.8b, v3.8b , v31.8b //band_table_u.val[2] = vadd_u8(band_table_u.val[2], sao_band_pos_u) 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 149d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v27.8b, v30.b[3] //vdup_n_u8(pi1_sao_offset_u[3]) 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v8.8b, v4.8b , v31.8b //band_table_u.val[3] = vadd_u8(band_table_u.val[3], sao_band_pos_u) 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x5,#28 153d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v26.8b, v30.b[4] //vdup_n_u8(pi1_sao_offset_u[4]) 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADRP x14, :got:gu1_table_band_idx 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR x14, [x14, #:got_lo12:gu1_table_band_idx] 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar movi v30.8b, #16 //vdup_n_u8(16) 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v1.8b, v5.8b , v29.8b //band_table_u.val[0] = vadd_u8(band_table_u.val[0], vdup_n_u8(pi1_sao_offset_u[1])) 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v9.8b},[x14],#8 //band_table_v.val[0] 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v2.8b, v6.8b , v28.8b //band_table_u.val[1] = vadd_u8(band_table_u.val[1], vdup_n_u8(pi1_sao_offset_u[2])) 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v10.8b},[x14],#8 //band_table_v.val[1] 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v3.8b, v7.8b , v27.8b //band_table_u.val[2] = vadd_u8(band_table_u.val[2], vdup_n_u8(pi1_sao_offset_u[3])) 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x6, x17 //Loads sao_band_pos_v 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v4.8b, v8.8b , v26.8b //band_table_u.val[3] = vadd_u8(band_table_u.val[3], vdup_n_u8(pi1_sao_offset_u[4])) 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL x11,x6,#3 //sao_band_pos_v 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT SAO_BAND_POS_U_0 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_U_28: //case 28 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v13.8b, v30.8b , v4.8b //vcle_u8(band_table.val[3], vdup_n_u8(16)) 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SAO_BAND_POS_U_29 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ORR v4.8b, v4.8b , v13.8b //band_table.val[3] = vorr_u8(band_table.val[3], au1_cmp) 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar B SWITCH_BREAK_U 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_U_29: //case 29 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x5,#29 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v14.8b, v30.8b , v3.8b //vcle_u8(band_table.val[2], vdup_n_u8(16)) 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SAO_BAND_POS_U_30 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ORR v3.8b, v3.8b , v14.8b //band_table.val[2] = vorr_u8(band_table.val[2], au1_cmp) 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND v4.8b, v4.8b , v13.8b //band_table.val[3] = vand_u8(band_table.val[3], au1_cmp) 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar B SWITCH_BREAK_U 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_U_30: //case 30 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x5,#30 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v15.8b, v30.8b , v2.8b //vcle_u8(band_table.val[1], vdup_n_u8(16)) 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SAO_BAND_POS_U_31 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ORR v2.8b, v2.8b , v15.8b //band_table.val[1] = vorr_u8(band_table.val[1], au1_cmp) 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND v3.8b, v3.8b , v14.8b //band_table.val[2] = vand_u8(band_table.val[2], au1_cmp) 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_U_31: //case 31 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x5,#31 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SWITCH_BREAK_U 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v16.8b, v30.8b , v1.8b //vcle_u8(band_table.val[0], vdup_n_u8(16)) 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ORR v1.8b, v1.8b , v16.8b //band_table.val[0] = vorr_u8(band_table.val[0], au1_cmp) 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND v2.8b, v2.8b , v15.8b //band_table.val[1] = vand_u8(band_table.val[1], au1_cmp) 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar B SWITCH_BREAK_U 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_U_0: 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x5,#0 //case 0 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SWITCH_BREAK_U 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v16.8b, v30.8b , v1.8b //vcle_u8(band_table.val[0], vdup_n_u8(16)) 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND v1.8b, v1.8b , v16.8b //band_table.val[0] = vand_u8(band_table.val[0], au1_cmp) 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSWITCH_BREAK_U: 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dup v30.8b,w11 //band_pos_v 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x8, x20 //Loads pi1_sao_offset_v 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v11.8b},[x14],#8 //band_table_v.val[2] 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v13.8b, v9.8b , v30.8b //band_table_v.val[0] = vadd_u8(band_table_v.val[0], band_pos_v) 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v12.8b},[x14],#8 //band_table_v.val[3] 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v14.8b, v10.8b , v30.8b //band_table_v.val[1] = vadd_u8(band_table_v.val[1], band_pos_v) 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD1 {v25.8b},[x8] //pi1_sao_offset_v load 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v15.8b, v11.8b , v30.8b //band_table_v.val[2] = vadd_u8(band_table_v.val[2], band_pos_v) 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 228d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v29.8b, v25.b[1] //vdup_n_u8(pi1_sao_offset_v[1]) 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v16.8b, v12.8b , v30.8b //band_table_v.val[3] = vadd_u8(band_table_v.val[3], band_pos_v) 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 231d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v28.8b, v25.b[2] //vdup_n_u8(pi1_sao_offset_v[2]) 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v9.8b, v13.8b , v29.8b //band_table_v.val[0] = vadd_u8(band_table_v.val[0], vdup_n_u8(pi1_sao_offset_v[1])) 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 234d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v27.8b, v25.b[3] //vdup_n_u8(pi1_sao_offset_v[3]) 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v10.8b, v14.8b , v28.8b //band_table_v.val[1] = vadd_u8(band_table_v.val[1], vdup_n_u8(pi1_sao_offset_v[2])) 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 237d91eedb8cdcdd3d4f23379517752d48fa5791604Bernhard Rosenkränzer dup v26.8b, v25.b[4] //vdup_n_u8(pi1_sao_offset_v[4]) 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v11.8b, v15.8b , v27.8b //band_table_v.val[2] = vadd_u8(band_table_v.val[2], vdup_n_u8(pi1_sao_offset_v[3])) 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar movi v29.8b, #16 //vdup_n_u8(16) 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD v12.8b, v16.8b , v26.8b //band_table_v.val[3] = vadd_u8(band_table_v.val[3], vdup_n_u8(pi1_sao_offset_v[4])) 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND x12,x9,#0xf 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x6,#28 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT SAO_BAND_POS_V_0 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_V_28: //case 28 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v17.8b, v29.8b , v12.8b //vcle_u8(band_table.val[3], vdup_n_u8(16)) 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SAO_BAND_POS_V_29 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ORR v12.8b, v12.8b , v17.8b //band_table.val[3] = vorr_u8(band_table.val[3], au1_cmp) 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar B SWITCH_BREAK_V 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_V_29: //case 29 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x6,#29 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v18.8b, v29.8b , v11.8b //vcle_u8(band_table.val[2], vdup_n_u8(16)) 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SAO_BAND_POS_V_30 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ORR v11.8b, v11.8b , v18.8b //band_table.val[2] = vorr_u8(band_table.val[2], au1_cmp) 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND v12.8b, v12.8b , v17.8b //band_table.val[3] = vand_u8(band_table.val[3], au1_cmp) 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar B SWITCH_BREAK_V 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_V_30: //case 30 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x6,#30 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v19.8b, v29.8b , v10.8b //vcle_u8(band_table.val[1], vdup_n_u8(16)) 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SAO_BAND_POS_V_31 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ORR v10.8b, v10.8b , v19.8b //band_table.val[1] = vorr_u8(band_table.val[1], au1_cmp) 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND v11.8b, v11.8b , v18.8b //band_table.val[2] = vand_u8(band_table.val[2], au1_cmp) 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar B SWITCH_BREAK_V 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_V_31: //case 31 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x6,#31 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SWITCH_BREAK_V 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v20.8b, v29.8b , v9.8b //vcle_u8(band_table.val[0], vdup_n_u8(16)) 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ORR v9.8b, v9.8b , v20.8b //band_table.val[0] = vorr_u8(band_table.val[0], au1_cmp) 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND v10.8b, v10.8b , v19.8b //band_table.val[1] = vand_u8(band_table.val[1], au1_cmp) 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar B SWITCH_BREAK_V 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSAO_BAND_POS_V_0: 2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x6,#0 //case 0 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SWITCH_BREAK_V 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cmhs v20.8b, v29.8b , v9.8b //vcle_u8(band_table.val[0], vdup_n_u8(16)) 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar AND v9.8b, v9.8b , v20.8b //band_table.val[0] = vand_u8(band_table.val[0], au1_cmp) 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSWITCH_BREAK_V: 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x9,#16 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x4,x0 //pu1_src_cpy 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v1.d[1],v2.d[0] 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v2.d[0],v3.d[0] 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v2.d[1],v4.d[0] 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v9.d[1],v10.d[0] 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v10.d[0],v11.d[0] 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v10.d[1],v12.d[0] 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT WIDTH_RESIDUE 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_LOOP: //Width is assigned to be multiple of 16 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x4,x0 //pu1_src_cpy 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x11,x10 //move ht 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x5,x4,x1 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarHEIGHT_LOOP: //unrolled for 4 rows 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x6,x5,x1 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD2 {v5.8b, v6.8b},[x4] //vld1q_u8(pu1_src_cpy) 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x7,x6,x1 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD2 {v13.8b, v14.8b},[x5] //vld1q_u8(pu1_src_cpy) 3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v7.8b, v5.8b , v31.8b //vsub_u8(au1_cur_row_deint.val[0], band_pos_u) 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD2 {v17.8b, v18.8b},[x6] //vld1q_u8(pu1_src_cpy) 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v8.8b, v6.8b , v30.8b //vsub_u8(au1_cur_row_deint.val[1], band_pos_v) 3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD2 {v21.8b, v22.8b},[x7] //vld1q_u8(pu1_src_cpy) 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v15.8b, v13.8b , v31.8b //vsub_u8(au1_cur_row_deint.val[0], band_pos_u) 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v5.8b, {v1.16b- v2.16b},v7.8b //vtbx4_u8(au1_cur_row_deint.val[0], band_table_u, vsub_u8(au1_cur_row_deint.val[0], band_pos_u)) 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v16.8b, v14.8b , v30.8b //vsub_u8(au1_cur_row_deint.val[1], band_pos_v) 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v6.8b, {v9.16b- v10.16b},v8.8b //vtbx4_u8(au1_cur_row_deint.val[1], band_table_v, vsub_u8(au1_cur_row_deint.val[1], band_pos_v)) 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v19.8b, v17.8b , v31.8b //vsub_u8(au1_cur_row_deint.val[0], band_pos_u) 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v13.8b, {v1.16b- v2.16b},v15.8b //vtbx4_u8(au1_cur_row_deint.val[0], band_table_u, vsub_u8(au1_cur_row_deint.val[0], band_pos_u)) 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v20.8b, v18.8b , v30.8b //vsub_u8(au1_cur_row_deint.val[1], band_pos_v) 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v14.8b, {v9.16b- v10.16b},v16.8b //vtbx4_u8(au1_cur_row_deint.val[1], band_table_v, vsub_u8(au1_cur_row_deint.val[1], band_pos_v)) 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v23.8b, v21.8b , v31.8b //vsub_u8(au1_cur_row_deint.val[0], band_pos_u) 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST2 {v5.8b, v6.8b},[x4] //vst1q_u8(pu1_src_cpy, au1_cur_row) 3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v24.8b, v22.8b , v30.8b //vsub_u8(au1_cur_row_deint.val[1], band_pos_v) 3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS x11,x11,#4 //Decrement the ht loop count by 4 3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v17.8b, {v1.16b- v2.16b},v19.8b //vtbx4_u8(au1_cur_row_deint.val[0], band_table_u, vsub_u8(au1_cur_row_deint.val[0], band_pos_u)) 3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST2 {v13.8b, v14.8b},[x5] //vst1q_u8(pu1_src_cpy, au1_cur_row) 3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v18.8b, {v9.16b- v10.16b},v20.8b //vtbx4_u8(au1_cur_row_deint.val[1], band_table_v, vsub_u8(au1_cur_row_deint.val[1], band_pos_v)) 3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v21.8b, {v1.16b- v2.16b},v23.8b //vtbx4_u8(au1_cur_row_deint.val[0], band_table_u, vsub_u8(au1_cur_row_deint.val[0], band_pos_u)) 3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v22.8b, {v9.16b- v10.16b},v24.8b //vtbx4_u8(au1_cur_row_deint.val[1], band_table_v, vsub_u8(au1_cur_row_deint.val[1], band_pos_v)) 3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST2 {v17.8b, v18.8b},[x6],x1 //vst1q_u8(pu1_src_cpy, au1_cur_row) 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x4,x6,x1 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST2 {v21.8b, v22.8b},[x7] //vst1q_u8(pu1_src_cpy, au1_cur_row) 3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x5,x4,x1 3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE HEIGHT_LOOP 3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB x9,x9,#16 //Decrement the width loop by 16 3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x0,x0,#16 3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP x9,#8 3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT WIDTH_LOOP 3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT END_LOOP 3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV x4,x0 //pu1_src_cpy 3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_RESIDUE: //If width is not multiple of 16 3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x5,x4,x1 3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD2 {v5.8b, v6.8b},[x4] //vld1q_u8(pu1_src_cpy) 3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x6,x5,x1 3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x7,x6,x1 3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD2 {v13.8b, v14.8b},[x5] //vld1q_u8(pu1_src_cpy) 3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v7.8b, v5.8b , v31.8b //vsub_u8(au1_cur_row_deint.val[0], band_pos_u) 3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD2 {v17.8b, v18.8b},[x6] //vld1q_u8(pu1_src_cpy) 3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v8.8b, v6.8b , v30.8b //vsub_u8(au1_cur_row_deint.val[1], band_pos_v) 3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v5.8b, {v1.16b- v2.16b},v7.8b //vtbx4_u8(au1_cur_row_deint.val[0], band_table_u, vsub_u8(au1_cur_row_deint.val[0], band_pos_u)) 3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v15.8b, v13.8b , v31.8b //vsub_u8(au1_cur_row_deint.val[0], band_pos_u) 3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v6.8b, {v9.16b- v10.16b},v8.8b //vtbx4_u8(au1_cur_row_deint.val[1], band_table_v, vsub_u8(au1_cur_row_deint.val[1], band_pos_v)) 3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v16.8b, v14.8b , v30.8b //vsub_u8(au1_cur_row_deint.val[1], band_pos_v) 3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LD2 {v21.8b, v22.8b},[x7] //vld1q_u8(pu1_src_cpy) 3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v19.8b, v17.8b , v31.8b //vsub_u8(au1_cur_row_deint.val[0], band_pos_u) 3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v13.8b, {v1.16b- v2.16b},v15.8b //vtbx4_u8(au1_cur_row_deint.val[0], band_table_u, vsub_u8(au1_cur_row_deint.val[0], band_pos_u)) 3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v20.8b, v18.8b , v30.8b //vsub_u8(au1_cur_row_deint.val[1], band_pos_v) 3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v14.8b, {v9.16b- v10.16b},v16.8b //vtbx4_u8(au1_cur_row_deint.val[1], band_table_v, vsub_u8(au1_cur_row_deint.val[1], band_pos_v)) 3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ZIP1 v28.8b, v5.8b, v6.8b 3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ZIP2 v6.8b, v5.8b, v6.8b 3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v5.8b, v28.8b 3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v17.8b, {v1.16b- v2.16b},v19.8b //vtbx4_u8(au1_cur_row_deint.val[0], band_table_u, vsub_u8(au1_cur_row_deint.val[0], band_pos_u)) 3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v23.8b, v21.8b , v31.8b //vsub_u8(au1_cur_row_deint.val[0], band_pos_u) 3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST1 {v5.8b},[x4] //vst1q_u8(pu1_src_cpy, au1_cur_row) 3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ZIP1 v28.8b, v13.8b, v14.8b 3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ZIP2 v14.8b, v13.8b, v14.8b 3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v13.8b, v28.8b 3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v18.8b, {v9.16b- v10.16b},v20.8b //vtbx4_u8(au1_cur_row_deint.val[1], band_table_v, vsub_u8(au1_cur_row_deint.val[1], band_pos_v)) 3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB v24.8b, v22.8b , v30.8b //vsub_u8(au1_cur_row_deint.val[1], band_pos_v) 4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST1 {v13.8b},[x5] //vst1q_u8(pu1_src_cpy, au1_cur_row) 4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS x10,x10,#4 //Decrement the ht loop count by 4 4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v21.8b, {v1.16b- v2.16b},v23.8b //vtbx4_u8(au1_cur_row_deint.val[0], band_table_u, vsub_u8(au1_cur_row_deint.val[0], band_pos_u)) 4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ZIP1 v28.8b, v17.8b, v18.8b 4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ZIP2 v18.8b, v17.8b, v18.8b 4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v17.8b, v28.8b 4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar TBX v22.8b, {v9.16b- v10.16b},v24.8b //vtbx4_u8(au1_cur_row_deint.val[1], band_table_v, vsub_u8(au1_cur_row_deint.val[1], band_pos_v)) 4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST1 {v17.8b},[x6],x1 //vst1q_u8(pu1_src_cpy, au1_cur_row) 4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ZIP1 v28.8b, v21.8b, v22.8b 4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ZIP2 v22.8b, v21.8b, v22.8b 4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar mov v21.8b, v28.8b 4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x4,x6,x1 4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ST1 {v21.8b},[x7] //vst1q_u8(pu1_src_cpy, au1_cur_row) 4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD x5,x4,x1 4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE WIDTH_RESIDUE 4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarEND_LOOP: 4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // LDMFD sp!,{x4-x12,x15} //Reload the registers from SP 4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x23, x24,[sp],#16 4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x21, x22,[sp],#16 4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ldp x19, x20,[sp],#16 4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pop_v_regs 4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ret 4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 431