10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ihevc_sao_edge_offset_class3.s 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Contains function definitions for inter prediction interpolation. 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Functions are coded using NEON intrinsics and can be compiled using@ ARM 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* RVCT 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:author 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Parthiban V 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:par List of Functions: 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:remarks 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* None 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/ 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_sao_edge_offset_class3(UWORD8 *pu1_src, 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 src_strd, 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_left, 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top, 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top_left, 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top_right, 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_bot_left, 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_avail, 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD8 *pi1_sao_offset, 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 wd, 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 ht) 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************Variables Vs Registers***************************************** 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r0 => *pu1_src 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r1 => src_strd 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r2 => *pu1_src_left 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r3 => *pu1_src_top 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r4 => *pu1_src_top_left 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r5 => *pu1_avail 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r6 => *pi1_sao_offset 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r7 => wd 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r8=> ht 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.p2align 2 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gi1_table_edge_idx 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_sao_edge_offset_class3_a9q 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_1: 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl1 - 8 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_2: 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl2 - 8 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_3: 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl3 - 8 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_sao_edge_offset_class3_a9q: 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STMFD sp!,{r4-r12,r14} @stack stores the values of the arguments 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x3C] @Loads wd 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x40] @Loads ht 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,r7,#1 @wd - 1 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x28] @Loads pu1_src_top_left 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r3,r9] @pu1_src_top[wd - 1] 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r9,r7 @Move width to r9 for loop count 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x34] @Loads pu1_avail 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r6,[sp,#0x38] @Loads pi1_sao_offset 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r3,[sp,#0x38] @Store pu1_src_top in sp 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB sp,sp,#0x94 @Decrement the stack pointer to store some temp arr values 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r10,[sp] @u1_src_top_left_tmp = pu1_src_top[wd - 1] 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r8,#1 @ht-1 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r11,r10,r1,r0 @pu1_src[(ht - 1) * src_strd + col] 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,sp,#0x02 @temp array 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_TOP_LOOP: 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D0,[r11]! @pu1_src[(ht - 1) * src_strd + col] 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r9,r9,#8 @Decrement the loop count by 8 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 D0,[r12]! @au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col] 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_TOP_LOOP 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_5_LOOP: 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r5,#5] @pu1_avail[5] 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r9,#0 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r7,#1 @[wd - 1] 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r0,r10] @u1_pos_0_0_tmp = pu1_src[wd - 1] 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_6_LOOP 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0xC0] @Load pu1_src_top_right from sp 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r10,#1 @[wd - 1 - 1] 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11] @pu1_src_top_right[0] 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r12,r9,r11 @pu1_src[wd - 1] - pu1_src_top_right[0] 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r0,r1 @pu1_src + src_strd 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r11,r10] @pu1_src[wd - 1 - 1 + src_strd] 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r12,#0 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r9,r14 @pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd] 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r12,#1 @SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]) 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_1 @table pointer 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl1: 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r12,r11 @SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) + SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]) 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 @0 != edge_idx 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_6_LOOP 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r10,[r6,r12] @pi1_sao_offset[edge_idx] 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r9,r10 @pu1_src[0] + pi1_sao_offset[edge_idx] 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r9,#8,r9 @u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_6_LOOP: 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r5,#6] @pu1_avail[6] 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r8,#1 @ht - 1 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r0,[sp,#0xC0] @Store pu1_src in sp 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r12,r11,r1,r0 @pu1_src[(ht - 1) * src_strd] 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r12] @u1_pos_wd_ht_tmp = pu1_src[(ht - 1) * src_strd] 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_3_LOOP 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14,[sp,#0xC4] @Load pu1_src_bot_left from sp 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r12,r1 @pu1_src[(ht - 1) * src_strd) - src_strd] 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r14] @Load pu1_src_bot_left[0] 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#1 @pu1_src[(ht - 1) * src_strd + 1 - src_strd] 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11] @Load pu1_src[(ht - 1) * src_strd + 1 - src_strd] 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r14,r10,r14 @pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0] 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r10,r11 @pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd] 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd]) 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r14,#0 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r14,#0 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r14,#1 @SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]) 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,r14 @Add 2 sign value 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_2 @table pointer 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl2: 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_3_LOOP 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r6,r12] @pi1_sao_offset[edge_idx] 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r10,r10,r11 @pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx] 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r10,#8,r10 @u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_3_LOOP: 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r2,[sp,#0xC4] @Store pu1_src_left in sp 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r12,r8 @Move ht 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r14,r2 @Move pu1_src_left to pu1_src_left_cpy 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q0,#2 @const_2 = vdupq_n_s8(2) 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r5,#3] @pu1_avail[3] 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I16 Q1,#0 @const_min_clip = vdupq_n_s16(0) 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r12,r12,#1 @ht_tmp-- 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I16 Q2,#255 @const_max_clip = vdupq_n_u16((1 << bit_depth) - 1) 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADDEQ r0,r0,r1 @pu1_src += src_strd 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D7,[r6] @offset_tbl = vld1_s8(pi1_sao_offset) 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r12,r12,#1 @ht_tmp-- 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r6, gi1_table_edge_idx_addr_3 @table pointer 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl3: 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r6,r6,pc 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.S8 Q4,#0xFF @au1_mask = vdupq_n_s8(-1) 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADDEQ r14,r14,#1 @pu1_src_left_cpy += 1 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r0,[sp,#0x90] @Store pu1_src in sp 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D6,[r6] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r6,r7 @move wd to r6 loop_count 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#16 @Compare wd with 16 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT WIDTH_RESIDUE @If not jump to WIDTH_RESIDUE where loop is unrolled for 8 case 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#4 @Compare ht with 4 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLE WD_16_HT_4_LOOP @If jump to WD_16_HT_4_LOOP 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_LOOP_16: 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0xD0] @Loads wd 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @Loads pu1_avail 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @col == wd 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#16 @if(col == 16) 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SKIP_AU1_MASK_VAL 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#1] @pu1_avail[1] 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d9[7],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSKIP_AU1_MASK_VAL: 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#2] @pu1_avail[2] 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0xD4] @Loads ht 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r0,r1 @pu1_src - src_strd 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,r3 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x42 @*au1_src_left_tmp 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0xD0] @Loads wd 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#1 @pu1_src - src_strd + 1 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 @(wd - col) 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 1) 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 1) 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r3,r3,#16 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0xC0] @Loads *pu1_src 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,#15 @15 + (wd - col) 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)] 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r5,#1 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP: 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r7],r1 @load the value and increment by src_strd 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r8,[r5,#1]! @store it in the stack pointer 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1 @I *pu1_src + src_strd 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @I ht_tmp - row 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r5 @I pu1_src_left_cpy[ht_tmp - row] 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#1 @I pu1_src_left_cpy[ht_tmp - row + 1] 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r8] 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @I Loads pu1_avail 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D19[7],r8 @I vsetq_lane_u8 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @I pu1_avail[2] 2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#15 @I pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 15) 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 @I 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE @I 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE: 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#15] @I pu1_src_cpy[15] 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r0,r1 @I pu1_src_cpy[16 - src_strd] 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#16] @I load the value 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @I pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd] 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 @I 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 @I 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @I SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]) 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r8 @I sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]), sign_up, 15) 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE: 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q5,Q6,Q9 @I vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q9,Q6,Q9 @I vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q5,Q9,Q5 @I sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q0,Q7 @I edge_idx = vaddq_s8(const_2, sign_up) 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q9,Q5 @I edge_idx = vaddq_s8(edge_idx, sign_down) 3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D18,{D6},D18 @I vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q5 @I sign_up = vnegq_s8(sign_down) 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#1 @I sign_up = vextq_s8(sign_up, sign_up, 1) 3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D19,{D6},D19 @I vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D12 @I pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q9,Q9,Q4 @I edge_idx = vandq_s8(edge_idx, au1_mask) 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D10,{D7},D18 @I offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx)) 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q11,D13 @I pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D10 @I pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @I pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D11,{D7},D19 @I offset = vtbl1_s8(offset_tbl, vget_high_s8(edge_idx)) 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @I pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q11,Q11,D11 @I pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q11,Q11,Q1 @I pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q11,Q11,Q2 @I pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @I Decrement the ht_tmp loop count by 1 3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP: 3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1,LSL #1 @II *pu1_src + src_strd 3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @I vmovn_s16(pi2_tmp_cur_row.val[0]) 3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @II ht_tmp - row 3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r4,r0,r1 @II pu1_src_cpy[16 - src_strd] 3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q11 @I vmovn_s16(pi2_tmp_cur_row.val[1]) 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r2,r8,r1 @III *pu1_src + src_strd 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r4,#15] @II pu1_src_cpy[15] 3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @II Decrement the ht_tmp loop count by 1 3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r5 @II pu1_src_left_cpy[ht_tmp - row] 3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D30,[r2]! @III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D31,[r2] @III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r2,#8 3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r8,#1] 3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r4,[r0,#16] @II load the value 3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D19[7],r8 @II vsetq_lane_u8 3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r11,r4 @II pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd] 3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 @II 3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @I vst1q_u8(pu1_src_cpy, pu1_cur_row) 3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @III ht_tmp - row 3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 @II 3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#15 @II pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 15) 3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @II SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]) 3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r5 @III pu1_src_left_cpy[ht_tmp - row] 3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r11 @II sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]), sign_up, 15) 3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 @III 3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE NEXT_ROW_ELSE_2 @III 3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @III Loads pu1_avail 3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#3] @III pu1_avail[3] 3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 @III 3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBNE r8,r2,#2 @III pu1_src_cpy[src_strd - 1] 3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_ELSE_2: 3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r8,#1] @III 3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q12,Q6,Q9 @II vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,r0,r1 3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r2,[r5,#15] @III pu1_src_cpy[15] 3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q13,Q6,Q9 @II vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r0,#16] @III load the value 3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r2,r2,r5 @III pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd] 3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q13,Q12 @II sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r2,#0 @III 3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r2,#0 @III 3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D19[7],r8 @III vsetq_lane_u8 3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r2,#1 @III SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]) 3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @III Decrement the ht_tmp loop count by 1 4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @II edge_idx = vaddq_s8(const_2, sign_up) 4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @II sign_up = vnegq_s8(sign_down) 4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q15,#15 @III pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 15) 4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @II edge_idx = vaddq_s8(edge_idx, sign_down) 4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#1 @II sign_up = vextq_s8(sign_up, sign_up, 1) 4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D6},D26 @II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q5,Q8,Q9 @III vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r2 @III sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]), sign_up, 15) 4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D6},D27 @II vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q9,Q8,Q9 @III vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @II edge_idx = vandq_s8(edge_idx, au1_mask) 4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q5,Q9,Q5 @III sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D7},D26 @II offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx)) 4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q0,Q7 @III edge_idx = vaddq_s8(const_2, sign_up) 4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q9,Q5 @III edge_idx = vaddq_s8(edge_idx, sign_down) 4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 @II offset = vtbl1_s8(offset_tbl, vget_high_s8(edge_idx)) 4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q5 @III sign_up = vnegq_s8(sign_down) 4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @II pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D18,{D6},D18 @III vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @II pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#1 @III sign_up = vextq_s8(sign_up, sign_up, 1) 4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D19,{D6},D19 @III vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q13,D13 @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q9,Q9,Q4 @III edge_idx = vandq_s8(edge_idx, au1_mask) 4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q13,Q13,D25 @II pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D10,{D7},D18 @III offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx)) 4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q13,Q13,Q1 @II pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D16 @III pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q13,Q13,Q2 @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D10 @III pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D11,{D7},D19 @III offset = vtbl1_s8(offset_tbl, vget_high_s8(edge_idx)) 4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @III pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q11,D17 @III pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @III pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D28,Q14 @II vmovn_s16(pi2_tmp_cur_row.val[0]) 4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q11,Q11,D11 @III pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D29,Q13 @II vmovn_s16(pi2_tmp_cur_row.val[1]) 4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q11,Q11,Q1 @III pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q15 @II pu1_cur_row = pu1_next_row 4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q11,Q11,Q2 @III pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 @III 4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q14},[r0],r1 @II vst1q_u8(pu1_src_cpy, pu1_cur_row) 4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT PU1_SRC_LOOP @If not equal jump to PU1_SRC_LOOP 4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT INNER_LOOP_DONE 4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1,LSL #1 @*pu1_src + src_strd 4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @III vmovn_s16(pi2_tmp_cur_row.val[0]) 4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @Loads pu1_avail 4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#3] @pu1_avail[3] 4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q11 @III vmovn_s16(pi2_tmp_cur_row.val[1]) 4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r4,r0,r1 @pu1_src_cpy[16 - src_strd] 4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r0,#16] @load the value 4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ NEXT_ROW_ELSE_3 4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r8,#-1] @pu1_src_cpy[src_strd - 1] 4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar B NEXT_ROW_POINTER_ASSIGNED_3 4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_ELSE_3: 4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r12,r7 @ht_tmp - row 4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r11 @pu1_src_left_cpy[ht_tmp - row] 4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#1 @pu1_src_left_cpy[ht_tmp - row + 1] 4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r8] 4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_3: 4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r4,#15] @pu1_src_cpy[15] 4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D19[7],r8 @vsetq_lane_u8 4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r11,r5 @pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd] 4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#15 @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 15) 4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @III vst1q_u8(pu1_src_cpy, pu1_cur_row) 4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q12,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]) 5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q13,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r8 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]), sign_up, 15) 5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q13,Q12 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q11,D13 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @edge_idx = vaddq_s8(edge_idx, sign_down) 5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D6},D26 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D6},D27 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D7},D26 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx)) 5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D24 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 @offset = vtbl1_s8(offset_tbl, vget_high_s8(edge_idx)) 5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q11,Q11,D25 @pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q11,Q11,Q1 @pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q11,Q11,Q2 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarINNER_LOOP_DONE: 5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @vmovn_s16(pi2_tmp_cur_row.val[0]) 5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0xD4] @Loads ht 5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q11 @vmovn_s16(pi2_tmp_cur_row.val[1]) 5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x42 @*au1_src_left_tmp 5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) 5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r2,[sp,#0xC4] @Loads *pu1_src_left 5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP: 5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#4 5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r2],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP 5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r6,r6,#16 @Decrement the wd loop count by 16 5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#8 @Check whether residue remains 5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT RE_ASSINING_LOOP @Jump to re-assigning loop 5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0xD0] @Loads wd 5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r0,[sp,#0x90] @Loads *pu1_src 5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r0,r0,r7 5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT WIDTH_LOOP_16 @If not equal jump to width_loop 5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ WIDTH_RESIDUE @If residue remains jump to residue loop 5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWD_16_HT_4_LOOP: 5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @Loads pu1_avail 5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0xD0] @Loads wd 5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @col == wd 5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#16 @if(col == 16) 5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SKIP_AU1_MASK_VAL_WD_16_HT_4 5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#1] @pu1_avail[1] 5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d9[7],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSKIP_AU1_MASK_VAL_WD_16_HT_4: 5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#2] @pu1_avail[2] 5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 5720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r0,r1 @pu1_src - src_strd 5740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,r3 5750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#1 @pu1_src - src_strd + 1 5760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 1) 5770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 1) 5780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 5790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r3,r3,#16 5810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x42 @*au1_src_left_tmp 5820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0xD4] @Loads ht 5830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0xD0] @Loads wd 5840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 @(wd - col) 5850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,#15 @15 + (wd - col) 5860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0xC0] @Loads *pu1_src 5870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)] 5880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r5,#1 5890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP_WD_16_HT_4: 5910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r7],r1 @load the value and increment by src_strd 5920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r8,[r5,#1]! @store it in the stack pointer 5930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 5940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP_WD_16_HT_4 5950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 5970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 5980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 5990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 6010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 6020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 6030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 6040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 6050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP_WD_16_HT_4: 6070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1 @*pu1_src + src_strd 6080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 6090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 6100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 6110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @Loads pu1_avail 6120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#3] @pu1_avail[3] 6130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 6140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ NEXT_ROW_ELSE_WD_16_HT_4 6150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 6160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r8,#-1] @pu1_src_cpy[src_strd - 1] 6170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ NEXT_ROW_POINTER_ASSIGNED_WD_16_HT_4 6180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_ELSE_WD_16_HT_4: 6190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @ht_tmp - row 6200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r5 @pu1_src_left_cpy[ht_tmp - row] 6210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#1 @pu1_src_left_cpy[ht_tmp - row + 1] 6220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r8] 6230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_WD_16_HT_4: 6250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D19[7],r8 @vsetq_lane_u8 6260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#15 @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 15) 6270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,r12 6290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_WD_16_HT_4 6300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @Loads pu1_avail 6310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 6320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 6330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE_WD_16_HT_4 6340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_WD_16_HT_4: 6360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#15] @pu1_src_cpy[15] 6370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,r0,#16 @pu1_src_cpy[16] 6380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r5,r1 @pu1_src_cpy[16 - src_strd] 6390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5] @load the value 6400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd] 6410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 6420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 6430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]) 6440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r8 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]), sign_up, 15) 6450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE_WD_16_HT_4: 6470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q10,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 6480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q11,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 6490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q11,Q10 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 6500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 6520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @edge_idx = vaddq_s8(edge_idx, sign_down) 6530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D6},D26 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 6540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D6},D27 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 6550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 6570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @sign_up = vnegq_s8(sign_down) 6590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#1 @sign_up = vextq_s8(sign_up, sign_up, 1) 6600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D7},D26 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx)) 6620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 6630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 6640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 6650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 6660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 @offset = vtbl1_s8(offset_tbl, vget_high_s8(edge_idx)) 6680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q15,D13 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 6690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q15,Q15,D25 @pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 6700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q15,Q15,Q1 @pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 6710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q15,Q15,Q2 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 6720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D28,Q14 @vmovn_s16(pi2_tmp_cur_row.val[0]) 6740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D29,Q15 @vmovn_s16(pi2_tmp_cur_row.val[1]) 6750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q14},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) 6770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 @pu1_cur_row = pu1_next_row 6790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#1 @Decrement the ht_tmp loop count by 1 6800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE PU1_SRC_LOOP_WD_16_HT_4 @If not equal jump to PU1_SRC_LOOP_WD_16_HT_4 6810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0xD4] @Loads ht 6830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x42 @*au1_src_left_tmp 6840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r2,[sp,#0xC4] @Loads *pu1_src_left 6850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP_WD_16_HT_4: 6860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 6870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r2],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 6880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#4 6890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP_WD_16_HT_4 6900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r6,r6,#16 @Decrement the wd loop count by 16 6920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLE RE_ASSINING_LOOP @Jump to re-assigning loop 6930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT WD_16_HT_4_LOOP @If not equal jump to width_loop 6940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_RESIDUE: 6970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0xD0] @Loads wd 6980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @Loads pu1_avail 6990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @wd_residue == wd 7000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 7010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 7030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 7040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#1] @pu1_avail[1] 7060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[7],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 7070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_2_RESIDUE: 7090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#2] @pu1_avail[2] 7100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 7110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r0,r1 @pu1_src - src_strd 7130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,r3 7140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#1 @pu1_src - src_strd + 1 7150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 1) 7160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 1) 7170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 7180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x42 @*au1_src_left_tmp 7210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0xD4] @Loads ht 7220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0xD0] @Loads wd 7230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0xC0] @Loads *pu1_src 7240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @(wd - 1) 7250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + (wd - 1)] 7260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r5,#1 7270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP_RESIDUE: 7290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r7],r1 @load the value and increment by src_strd 7300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r8,[r5,#1]! @store it in the stack pointer 7310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 7320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP_RESIDUE 7330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 7350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 7360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 7370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 7390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 7400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 7410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 7420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP_RESIDUE: 7440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 7450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1 @*pu1_src + src_strd 7460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 7470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 7480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 7490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @Loads pu1_avail 7500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#3] @pu1_avail[3] 7510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 7520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ NEXT_ROW_ELSE_RESIDUE 7530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 7540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r8,#-1] @pu1_src_cpy[src_strd - 1] 7550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ NEXT_ROW_POINTER_ASSIGNED_RESIDUE 7560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_ELSE_RESIDUE: 7570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @ht_tmp - row 7580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r5 @pu1_src_left_cpy[ht_tmp - row] 7590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#1 @pu1_src_left_cpy[ht_tmp - row + 1] 7600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r8] 7610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_RESIDUE: 7630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D19[7],r8 @vsetq_lane_u8 7640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#15 @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 15) 7650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,r12 7670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_RESIDUE 7680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0xC8] @Loads pu1_avail 7690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 7700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 7710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE_RESIDUE 7720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_RESIDUE: 7740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#15] @pu1_src_cpy[15] 7750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,r0,#16 @pu1_src_cpy[16] 7760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r5,r1 @pu1_src_cpy[16 - src_strd] 7770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5] @load the value 7780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd] 7790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 7800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 7810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]) 7820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r8 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] - pu1_src_cpy[16 - src_strd]), sign_up, 15) 7830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE_RESIDUE: 7850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q10,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 7860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q11,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 7870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q11,Q10 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 7880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 7900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @edge_idx = vaddq_s8(edge_idx, sign_down) 7910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D6},D26 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 7920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D6},D27 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 7930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 7950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @sign_up = vnegq_s8(sign_down) 7970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#1 @sign_up = vextq_s8(sign_up, sign_up, 1) 7980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D7},D26 @offset = vtbl1_s8(offset_tbl, vget_low_s8(edge_idx)) 8000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 8010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 8020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 8030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 8040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D30,Q14 @vmovn_s16(pi2_tmp_cur_row.val[0]) 8060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {D30},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) 8080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 @pu1_cur_row = pu1_next_row 8090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#1 8100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE PU1_SRC_LOOP_RESIDUE 8110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0xD4] @Loads ht 8130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r2,[sp,#0xC4] @Loads *pu1_src_left 8140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x42 @*au1_src_left_tmp 8150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP_RESIDUE: 8170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 8180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#4 8190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r2],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 8200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP_RESIDUE 8210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarRE_ASSINING_LOOP: 8240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0xD0] @Loads wd 8250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r0,[sp,#0xC0] @Loads *pu1_src 8260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0xD4] @Loads ht 8280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r7 @pu1_src[wd] 8290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0xBC] @Loads pu1_src_top_left 8310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r11,#1 @ht - 1 8320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r9,[r8,#-1] @pu1_src_org[wd - 1] = u1_pos_wd_0_tmp 8340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r6,r11,r1,r0 @pu1_src_org[(ht - 1) * src_strd] 8350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[sp] @load u1_src_top_left_tmp from stack pointer 8370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,sp,#0x02 8380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r10,[r6] @pu1_src_org[wd - 1 + (ht - 1) * src_strd] = u1_pos_wd_ht_tmp 8400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r8,[r4] @*pu1_src_top_left = u1_src_top_left_tmp 8410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r3,[sp,#0xCC] @Loads pu1_src_top 8420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_TOP_LOOP: 8440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D0,[r12]! @pu1_src_top[col] = au1_src_top_tmp[col] 8450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#8 @Decrement the width 8460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 D0,[r3]! @pu1_src_top[col] = au1_src_top_tmp[col] 8470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_TOP_LOOP 8480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarEND_LOOPS: 8500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD sp,sp,#0x94 8510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDMFD sp!,{r4-r12,r15} @Reload the registers from SP 8520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 855