10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ihevc_sao_edge_offset_class3_chroma.s 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Contains function definitions for inter prediction interpolation. 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Functions are coded using NEON intrinsics and can be compiled using@ ARM 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* RVCT 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:author 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Parthiban V 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:par List of Functions: 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:remarks 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* None 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/ 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src, 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 src_strd, 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_left, 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top, 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top_left, 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top_right, 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_bot_left, 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_avail, 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD8 *pi1_sao_offset_u, 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD8 *pi1_sao_offset_v, 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 wd, 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 ht) 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************Variables Vs Registers***************************************** 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r0 => *pu1_src 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r1 => src_strd 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r2 => *pu1_src_left 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r3 => *pu1_src_top 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r4 => *pu1_src_top_left 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r5 => *pu1_avail 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r6 => *pi1_sao_offset_u 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r9 => *pi1_sao_offset_v 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r7 => wd 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r8=> ht 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.p2align 2 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gi1_table_edge_idx 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_sao_edge_offset_class3_chroma_a9q 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_1: 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl1 - 8 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_2: 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl2 - 8 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_3: 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl3 - 8 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_4: 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl4 - 8 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_5: 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl5 - 8 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_sao_edge_offset_class3_chroma_a9q: 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STMFD sp!,{r4-r12,r14} @stack stores the values of the arguments 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x40] @Loads wd 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x44] @Loads ht 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,r7,#2 @wd - 2 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x28] @Loads pu1_src_top_left 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r10,[r3,r9] @pu1_src_top[wd - 2] 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r9,r7 @Move width to r9 for loop count 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x34] @Loads pu1_avail 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r6,[sp,#0x38] @Loads pi1_sao_offset_u 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r3,[sp,#0x38] @Store pu1_src_top in sp 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB sp,sp,#0xD4 @Decrement the stack pointer to store some temp arr values 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r10,[sp] @u1_src_top_left_tmp = pu1_src_top[wd - 2] 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r8,#1 @ht-1 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r11,r10,r1,r0 @pu1_src[(ht - 1) * src_strd + col] 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,sp,#10 @temp array 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_TOP_LOOP: 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D0,[r11]! @pu1_src[(ht - 1) * src_strd + col] 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r9,r9,#8 @Decrement the loop count by 8 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 D0,[r12]! @au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col] 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_TOP_LOOP 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_5_LOOP_U: 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r5,#5] @pu1_avail[5] 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r9,#0 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r14,r7,#2 @[wd - 2] 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r0,r14] @u1_pos_0_0_tmp_u = pu1_src[wd - 2] 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r7,#1 @[wd - 1] 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r0,r11] @u1_pos_0_0_tmp_v = pu1_src[wd - 1] 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_6_LOOP_U 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x100] @Load pu1_src_top_right from sp 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11] @pu1_src_top_right[0] 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r12,r9,r11 @pu1_src[wd - 2] - pu1_src_top_right[0] 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r12,#0 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r12,#1 @SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r0,r1 @pu1_src + src_strd 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r14,r14,#2 @[wd - 2 - 2] 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r11,r14] @pu1_src[wd - 2 - 2 + src_strd] 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r9,r14 @pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd] 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]) 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r12,r11 @SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) + SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]) 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_1 @table pointer 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl1: 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 @0 != edge_idx 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_5_LOOP_V 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r6,r12] @pi1_sao_offset_u[edge_idx] 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r9,r11 @pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx] 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r9,#8,r9 @u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_5_LOOP_V: 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x100] @Load pu1_src_top_right from sp 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11,#1] @pu1_src_top_right[1] 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r12,r10,r11 @pu1_src[wd - 1] - pu1_src_top_right[1] 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r12,#0 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r12,#1 @SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r0,r1 @pu1_src + src_strd 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r14,r7,#3 @[wd - 1 - 2] 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r11,r14] @pu1_src[wd - 1 - 2 + src_strd] 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r10,r14 @pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd] 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]) 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r12,r11 @SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) + SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]) 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_2 @table pointer 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl2: 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 @0 != edge_idx 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_6_LOOP_U 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x110] @Loads pi1_sao_offset_v 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r11,r12] @pi1_sao_offset_v[edge_idx] 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r10,r10,r11 @pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx] 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r10,#8,r10 @u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_6_LOOP_U: 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r9,[sp,#6] 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r10,[sp,#7] 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r0,[sp,#0x100] @Store pu1_src in sp 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r5,#6] @pu1_avail[6] 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r8,#1 @ht - 1 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r12,r11,r1,r0 @pu1_src[(ht - 1) * src_strd] 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r12] @u1_pos_wd_ht_tmp_u = pu1_src[(ht - 1) * src_strd] 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r12,#1] @u1_pos_wd_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1] 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_3_LOOP 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r12,r1 @pu1_src[(ht - 1) * src_strd - src_strd] 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @pu1_src[(ht - 1) * src_strd + 2 - src_strd] 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11] @Load pu1_src[(ht - 1) * src_strd + 2 - src_strd] 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r10,r11 @pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd] 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14,[sp,#0x104] @Load pu1_src_bot_left from sp 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r14] @Load pu1_src_bot_left[0] 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r14,r10,r14 @pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0] 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r14,#0 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r14,#0 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r14,#1 @SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]) 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,r14 @Add 2 sign value 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_3 @table pointer 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl3: 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r14,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r14,#0 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_6_LOOP_V 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r6,r14] @pi1_sao_offset_u[edge_idx] 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r10,r10,r11 @pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx] 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r10,#8,r10 @u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_6_LOOP_V: 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,r12,#1 @pu1_src[(ht - 1) * src_strd + 1] 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r12,r1 @pu1_src[(ht - 1) * src_strd + 1) - src_strd] 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @pu1_src[(ht - 1) * src_strd + 2 - src_strd] 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11] @Load pu1_src[(ht - 1) * src_strd + 2 - src_strd] 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r9,r11 @pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd] 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14,[sp,#0x104] @Load pu1_src_bot_left from sp 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r14,#1] @Load pu1_src_bot_left[1] 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r14,r9,r14 @pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1] 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r14,#0 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r14,#0 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r14,#1 @SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]) 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,r14 @Add 2 sign value 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_4 @table pointer 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl4: 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_3_LOOP 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14,[sp,#0x110] @Loads pi1_sao_offset_v 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r14,r12] @pi1_sao_offset_v[edge_idx] 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r9,r11 @pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx] 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r9,#8,r9 @u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_3_LOOP: 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r10,[sp,#8] 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r9,[sp,#9] 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r2,[sp,#0x104] @Store pu1_src_left in sp 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r12,r8 @Move ht 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r14,r2 @Move pu1_src_left to pu1_src_left_cpy 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r5,#3] @pu1_avail[3] 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE PU1_AVAIL_2_LOOP 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r12,r12,#1 @ht_tmp-- 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_2_LOOP: 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE PU1_AVAIL_2_LOOP_END 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r0,r0,r1 @pu1_src += src_strd 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r12,r12,#1 @ht_tmp-- 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r14,r14,#2 @pu1_src_left_cpy += 2 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_2_LOOP_END: 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r0,[sp,#2] @Store pu1_src in sp 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q0,#2 @const_2 = vdupq_n_s8(2) 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I16 Q1,#0 @const_min_clip = vdupq_n_s16(0) 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I16 Q2,#255 @const_max_clip = vdupq_n_u16((1 << bit_depth) - 1) 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D6,[r6] @offset_tbl_u = vld1_s8(pi1_sao_offset_u) 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r6,[sp,#0x110] @Loads pi1_sao_offset_v 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D7,[r6] @offset_tbl_v = vld1_s8(pi1_sao_offset_v) 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r2, gi1_table_edge_idx_addr_5 @table pointer 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl5: 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r2,r2,pc 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar @VLD1.8 D6,[r6] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.S8 Q4,#0xFF @au1_mask = vdupq_n_s8(-1) 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r6,r7 @move wd to r6 loop_count 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#16 @Compare wd with 16 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT WIDTH_RESIDUE @If not jump to WIDTH_RESIDUE where loop is unrolled for 8 case 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#4 @Compare ht with 4 2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLE WD_16_HT_4_LOOP @If jump to WD_16_HT_4_LOOP 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_LOOP_16: 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @col == wd 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r5,#2] @pu1_avail[2] 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#16 @if(col == 16) 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D8[1],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SKIP_AU1_MASK_VAL 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#1] @pu1_avail[1] 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D9[6],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D9[7],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSKIP_AU1_MASK_VAL: 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r0,r1 @pu1_src - src_strd 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,r3 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#2 @pu1_src - src_strd + 2 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r3,r3,#16 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x118] @Loads ht 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 @(wd - col) 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,#14 @15 + (wd - col) 3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x100] @Loads *pu1_src 3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)] 3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP: 3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r8,[r7] @load the value and increment by src_strd 3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r8,[r5],#2 @store it in the stack pointer 3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,r1 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 @I 3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r0,r1 @I *pu1_src + src_strd 3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @I ht_tmp - row 3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r11]! @I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r11] @I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,#8 3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r5,LSL #1 @I pu1_src_left_cpy[(ht_tmp - row) * 2] 3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r8,#2] @I 3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D19[3],r5 @I vsetq_lane_u8 3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x108] @I Loads pu1_avail 3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11,#2] @I pu1_avail[2] 3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#14 @I pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14) 3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 @I 3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE @I 3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#14] @I pu1_src_cpy[14] 3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r0,r1 @I 3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r5,#16] @I load the value pu1_src_cpy[16 - src_strd] 3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r0,#15] @I pu1_src_cpy[15] 3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r11 @I pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd] 3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r5,#17] @I load the value pu1_src_cpy[17 - src_strd] 3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 @I 3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 @I 3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,r9,r10 @I pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @I SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]) 3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r9,#0 @I 3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r9,#0 @I 3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[6],r8 @I sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0) 3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r9,#1 @I SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r9 @I sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1) 3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE: 3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D28,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q10,Q6,Q9 @I vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q11,Q6,Q9 @I vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q11,Q11,Q10 @I sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q0,Q7 @I edge_idx = vaddq_s8(const_2, sign_up) 3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q9,Q11 @I edge_idx = vaddq_s8(edge_idx, sign_down) 4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D18,{D28},D18 @I vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q11 @I sign_up = vnegq_s8(sign_down) 4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D19,{D28},D19 @I vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#2 @I sign_up = vextq_s8(sign_up, sign_up, 2) 4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D12 @I pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q9,Q9,Q4 @I edge_idx = vandq_s8(edge_idx, au1_mask) 4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D18,D19 @I 4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D22,{D6},D18 @I 4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D23,{D7},D19 @I 4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D22,D23 @I 4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q9,D13 @I pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D22 @I pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @I pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @I pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 @I pu1_cur_row = pu1_next_row 4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q9,Q9,D23 @I pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @I Decrement the ht_tmp loop count by 1 4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q9,Q9,Q1 @I pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q9,Q9,Q2 @I pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP: 4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r0,r1,LSL #1 @II *pu1_src + src_strd 4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @I vmovn_s16(pi2_tmp_cur_row.val[0]) 4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @II ht_tmp - row 4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r4,r0,r1 @III *pu1_src + src_strd 4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q9 @I vmovn_s16(pi2_tmp_cur_row.val[1]) 4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r5,LSL #1 @II pu1_src_left_cpy[(ht_tmp - row) * 2] 4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r9,[r8,#2] 4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r11]! @II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r11] @II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,#8 4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r4,#14] @II pu1_src_cpy[14] 4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r4,#15] @II pu1_src_cpy[15] 4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D29[3],r9 @II vsetq_lane_u8 4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r4,r11,r1 @III *pu1_src + src_strd 4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r0,#17] @II load the value pu1_src_cpy[17 - src_strd] 4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D30,[r4]! @III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D31,[r4] @III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r4,#8 4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r0,#16] @II load the value pu1_src_cpy[16 - src_strd] 4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @II Decrement the ht_tmp loop count by 1 4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @I vst1q_u8(pu1_src_cpy, pu1_cur_row) 4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r10,r11 @II pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd] 4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 @II 4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q14,Q14,Q8,#14 @II pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14) 4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @II pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r10,#0 @II 4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D21,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r10,#1 @II SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]) 4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 @II 4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[6],r10 @II sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0) 4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 @II 4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @II SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r12,r7 @III ht_tmp - row 4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r8 @II sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1) 4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r14,r10,LSL #1 @III pu1_src_left_cpy[(ht_tmp - row) * 2] 4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 @III 4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q11,Q6,Q14 @II vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE NEXT_ROW_POINTER_ASSIGNED_2 @III 4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @III Loads pu1_avail 4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#3] @III pu1_avail[3] 4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 @III 4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBNE r11,r4,#4 @III pu1_src[src_strd - 2] 4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_2: 4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r11,#2] @III 4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q12,Q6,Q14 @II vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r0,r1 @III 4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r11,#14] @III pu1_src_cpy[14] 4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D19[3],r5 @III vsetq_lane_u8 4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r11,#15] @III pu1_src_cpy[15] 4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r0,#16] @III load the value pu1_src_cpy[16 - src_strd] 4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q12,Q11 @II sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r0,#17] @III load the value pu1_src_cpy[17 - src_strd] 4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,r9,r11 @III pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd] 4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q15,#14 @III pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14) 4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r8,r10 @III pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r9,#0 @III 5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @II edge_idx = vaddq_s8(const_2, sign_up) 5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r9,#0 @III 5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r9,#1 @III SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]) 5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @II edge_idx = vaddq_s8(edge_idx, sign_down) 5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 @III 5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @II sign_up = vnegq_s8(sign_down) 5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D21},D26 @II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r10,#0 @III 5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r10,#1 @III SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#2 @II sign_up = vextq_s8(sign_up, sign_up, 2) 5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D21},D27 @II vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q11,Q8,Q9 @III vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[6],r9 @III sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0) 5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @II edge_idx = vandq_s8(edge_idx, au1_mask) 5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r10 @III sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1) 5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D26,D27 @II 5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q10,Q8,Q9 @III vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D6},D26 @II 5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q11,Q10,Q11 @III sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q0,Q7 @III edge_idx = vaddq_s8(const_2, sign_up) 5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 @II 5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q9,Q11 @III edge_idx = vaddq_s8(edge_idx, sign_down) 5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D20,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D24,D25 @II 5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D18,{D20},D18 @III vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q11 @III sign_up = vnegq_s8(sign_down) 5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @II pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D19,{D20},D19 @III vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#2 @III sign_up = vextq_s8(sign_up, sign_up, 2) 5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q13,D13 @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q9,Q9,Q4 @III edge_idx = vandq_s8(edge_idx, au1_mask) 5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D16 @III pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D18,D19 @III 5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @II pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D22,{D6},D18 @III 5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q13,Q13,D25 @II pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D23,{D7},D19 @III 5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q13,Q13,Q1 @II pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q9,D17 @III pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D22,D23 @III 5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D28,Q14 @II vmovn_s16(pi2_tmp_cur_row.val[0]) 5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D22 @III pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q15 @III pu1_cur_row = pu1_next_row 5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q13,Q13,Q2 @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @III Decrement the ht_tmp loop count by 1 5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @III pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 @III 5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D29,Q13 @II vmovn_s16(pi2_tmp_cur_row.val[1]) 5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @III pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 5720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q9,Q9,D23 @III pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 5740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q9,Q9,Q1 @III pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 5760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q14},[r0],r1 @II vst1q_u8(pu1_src_cpy, pu1_cur_row) 5780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q9,Q9,Q2 @III pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 5790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT PU1_SRC_LOOP @If not equal jump to PU1_SRC_LOOP 5810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT INNER_LOOP_DONE 5820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r0,r1,LSL #1 @*pu1_src + src_strd 5850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @III vmovn_s16(pi2_tmp_cur_row.val[0]) 5860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @ht_tmp - row 5870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r5,LSL #1 @pu1_src_left_cpy[(ht_tmp - row) * 2] 5890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q9 @III vmovn_s16(pi2_tmp_cur_row.val[1]) 5900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 5910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r4,[r0,#16] @load the value pu1_src_cpy[16 - src_strd] 5930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r11]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 5940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r11] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 5950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,#8 5960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r0,#17] @load the value pu1_src_cpy[17 - src_strd] 5970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE NEXT_ROW_POINTER_ASSIGNED_3 5990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 6000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#3] @pu1_avail[3] 6010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 6020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBNE r8,r11,#4 @pu1_src[src_strd - 2] 6030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_3: 6050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r8,#2] 6060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @III vst1q_u8(pu1_src_cpy, pu1_cur_row) 6070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#14] @pu1_src_cpy[14] 6080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r4 @pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd] 6100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D19[3],r5 @vsetq_lane_u8 6110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r0,#15] @pu1_src_cpy[15] 6120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 6140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#14 @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14) 6150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r10,r9 @pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 6160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 6180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D28,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 6190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]) 6200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 6220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[6],r8 @sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0) 6230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r10,#0 6240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r10,#1 @SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 6260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r10 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1) 6270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q10,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 6280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q11,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 6300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q11,Q11,Q10 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 6310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 6330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q9,Q11 @edge_idx = vaddq_s8(edge_idx, sign_down) 6340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D18,{D28},D18 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 6350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D19,{D28},D19 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 6360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q9,Q9,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 6380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 6400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D18,D19 6410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D22,{D6},D18 6430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D23,{D7},D19 6440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q9,D13 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 6460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D22,D23 6470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D22 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 6490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 6500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 6510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q9,Q9,D23 @pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 6530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q9,Q9,Q1 @pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 6540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q9,Q9,Q2 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 6550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarINNER_LOOP_DONE: 6580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x118] @Loads ht 6600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @III vmovn_s16(pi2_tmp_cur_row.val[0]) 6610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 6620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL r8,r8,#1 6640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q9 @III vmovn_s16(pi2_tmp_cur_row.val[1]) 6650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x104] @Loads *pu1_src_left 6660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP: 6680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 6690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#4 6700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r11],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 6710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP 6720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r6,r6,#16 @Decrement the wd loop count by 16 6740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @III vst1q_u8(pu1_src_cpy, pu1_cur_row) 6750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#8 @Check whether residue remains 6760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT RE_ASSINING_LOOP @Jump to re-assigning loop 6780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 6790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r0,[sp,#0x02] @Loads *pu1_src 6800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 6810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r0,r0,r7 6820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT WIDTH_LOOP_16 @If not equal jump to width_loop 6830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ WIDTH_RESIDUE @If residue remains jump to residue loop 6840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWD_16_HT_4_LOOP: 6860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 6870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 6890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @col == wd 6900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 6920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 6930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 6940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#16 @if(col == 16) 6960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D8[1],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 6970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SKIP_AU1_MASK_VAL_WD_16_HT_4 6990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#1] @pu1_avail[1] 7000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D9[6],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 7010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D9[7],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 7020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSKIP_AU1_MASK_VAL_WD_16_HT_4: 7040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r5,#2] @pu1_avail[2] 7050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r0,r1 @pu1_src - src_strd 7060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 7080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,r3 7090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 7100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 7110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 7120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#2 @pu1_src - src_strd + 2 7130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r3,r3,#16 7150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) 7160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) 7170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 7180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 7190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x118] @Loads ht 7210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 7220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 7230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 @(wd - col) 7250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 7260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,#14 @15 + (wd - col) 7270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x100] @Loads *pu1_src 7290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 7300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)] 7310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP_WD_16_HT_4: 7330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r8,[r7] @load the value and increment by src_strd 7340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 7350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r8,[r5],#2 @store it in the stack pointer 7370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,r1 7380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP_WD_16_HT_4 7390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 7410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 7420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP_WD_16_HT_4: 7440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r0,r1 @*pu1_src + src_strd 7450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 7470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r9]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 7480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r9] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 7490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,#8 7500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#3] @pu1_avail[3] 7510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r12,r7 @ht_tmp - row 7530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r11,LSL #1 @pu1_src_left_cpy[(ht_tmp - row) * 2] 7540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#2 @pu1_src_left_cpy[(ht_tmp - row + 1) * 2] 7550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 7570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ NEXT_ROW_POINTER_ASSIGNED_WD_16_HT_4 7580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 7590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r9,#2 @pu1_src[src_strd - 2] 7600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_WD_16_HT_4: 7620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r8] 7630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D19[3],r5 @vsetq_lane_u8 7640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#14 @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14) 7650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,r12 7670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT SIGN_UP_CHANGE_WD_16_HT_4 7680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 7690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 7700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 7710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE_WD_16_HT_4 7720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_WD_16_HT_4: 7740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#14] @pu1_src_cpy[14] 7750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,r0,r1 7760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#16] @load the value pu1_src_cpy[16 - src_strd] 7780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r0,#15] @pu1_src_cpy[15] 7800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd] 7810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r9,#17] @load the value pu1_src_cpy[17 - src_strd] 7830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 7840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 7860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r10,r11 @pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 7870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]) 7890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 7910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[6],r8 @sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0) 7920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r10,#0 7930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r10,#1 @SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 7950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r10 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1) 7960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE_WD_16_HT_4: 7980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D20,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 7990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q11,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 8000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q12,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 8020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q12,Q11 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 8030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 8050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @edge_idx = vaddq_s8(edge_idx, sign_down) 8060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @sign_up = vnegq_s8(sign_down) 8080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D20},D26 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 8090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D20},D27 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 8110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#2 @sign_up = vextq_s8(sign_up, sign_up, 2) 8120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 8140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 8150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D26,D27 8180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D6},D26 8190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 8200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D24,D25 8210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q15,D13 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 8230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 8240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 8260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 8270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 @pu1_cur_row = pu1_next_row 8290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q15,Q15,D25 @pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 8300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q15,Q15,Q1 @pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 8320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q15,Q15,Q2 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 8330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D28,Q14 @vmovn_s16(pi2_tmp_cur_row.val[0]) 8350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D29,Q15 @vmovn_s16(pi2_tmp_cur_row.val[1]) 8360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#1 @Decrement the ht_tmp loop count by 1 8380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q14},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) 8390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE PU1_SRC_LOOP_WD_16_HT_4 @If not equal jump to PU1_SRC_LOOP_WD_16_HT_4 8400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x118] @Loads ht 8420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 8430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x104] @Loads *pu1_src_left 8440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP_WD_16_HT_4: 8460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 8470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#2 8480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r11],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 8490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP_WD_16_HT_4 8500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r6,r6,#16 @Decrement the wd loop count by 16 8520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLE RE_ASSINING_LOOP @Jump to re-assigning loop 8530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT WD_16_HT_4_LOOP @If not equal jump to width_loop 8540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_RESIDUE: 8560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 8570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 8590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @wd_residue == wd 8600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 8620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 8640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r5,#1] @pu1_avail[1] 8650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r5,#2] @pu1_avail[2] 8670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 8680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r9,#0 8690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r10,r0,r1 @pu1_src - src_strd 8710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[1],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 8720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r10,r3 8730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r10,r10,#2 @pu1_src - src_strd + 2 8750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[6],r11 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 8760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 8770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x118] @Loads ht 8790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[7],r11 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 8800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 8810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x100] @Loads *pu1_src 8830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r10]! @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) 8840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r10] @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) 8850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,#8 8860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#2 @(wd - 2) 8870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + (wd - 2)] 8890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP_RESIDUE: 8910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r8,[r7] @load the value and increment by src_strd 8920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,r1 8930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r8,[r5],#2 @store it in the stack pointer 8940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 8950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP_RESIDUE 8960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 8980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 8990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 9000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 9020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 9030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 9050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 9060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 9070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP_RESIDUE: 9090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r0,r1 @*pu1_src + src_strd 9100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r12,r7 @ht_tmp - row 9120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r9]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 9130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r9] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 9140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,#8 9150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 9160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#3] @pu1_avail[3] 9180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r14,r11,LSL #1 @pu1_src_left_cpy[(ht_tmp - row) * 2] 9190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 9210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#2 @pu1_src_left_cpy[(ht_tmp - row + 1) * 2] 9220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ NEXT_ROW_POINTER_ASSIGNED_RESIDUE 9240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 9250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r9,#2 @pu1_src[src_strd - 2] 9260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_RESIDUE: 9280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r8] 9290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r8,#1] 9310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D19[6],r5 @vsetq_lane_u8 9320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,r12 9330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D19[7],r8 @vsetq_lane_u8 9350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q9,Q8,#14 @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14) 9360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT SIGN_UP_CHANGE_RESIDUE 9380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 9390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 9400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 9410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE_RESIDUE 9420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_RESIDUE: 9440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#14] @pu1_src_cpy[14] 9450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,r0,r1 9460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#16] @load the value pu1_src_cpy[16 - src_strd] 9480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r0,#15] @pu1_src_cpy[15] 9500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd] 9510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r9,#17] @load the value pu1_src_cpy[17 - src_strd] 9530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 9540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 9560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r10,r11 @pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 9570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]) 9590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 9610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[6],r8 @sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0) 9620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r10,#0 9630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r10,#1 @SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd] 9650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D15[7],r10 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1) 9660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE_RESIDUE: 9680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D20,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 9690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q11,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 9700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q12,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 9720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q12,Q11 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 9730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 9750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @edge_idx = vaddq_s8(edge_idx, sign_down) 9760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @sign_up = vnegq_s8(sign_down) 9780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D20},D26 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 9790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D20},D27 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 9810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#2 @sign_up = vextq_s8(sign_up, sign_up, 14) 9820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 9840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 9850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D26,D27 9880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D6},D26 9890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 9900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D24,D25 9910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 @pu1_cur_row = pu1_next_row 9930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 9940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 9960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 9970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#1 @Decrement the ht_tmp loop count by 1 9990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D30,Q14 @vmovn_s16(pi2_tmp_cur_row.val[0]) 10000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {D30},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) 10020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE PU1_SRC_LOOP_RESIDUE @If not equal jump to PU1_SRC_LOOP 10040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x118] @Loads ht 10060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 10070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x104] @Loads *pu1_src_left 10090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP_RESIDUE: 10110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 10120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#2 10130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r11],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 10140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP_RESIDUE 10150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarRE_ASSINING_LOOP: 10180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 10190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x118] @Loads ht 10200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r0,[sp,#0x100] @Loads *pu1_src 10220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r7,#2 @wd - 2 10230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r9,[sp,#6] 10250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,#1 @ht - 1 10260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r9,[r0,r10] @pu1_src_org[0] = u1_pos_0_0_tmp 10280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r6,r8,r1,r0 @pu1_src[(ht - 1) * src_strd] 10290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0xFC] @Loads pu1_src_top_left 10310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r9,[sp,#8] 10330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,sp,#10 10340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r9,[r6] @pu1_src_org[(ht - 1) * src_strd] = u1_pos_wd_ht_tmp_u 10360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r10,[sp] @load u1_src_top_left_tmp from stack pointer 10380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r10,[r4] @*pu1_src_top_left = u1_src_top_left_tmp 10390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r3,[sp,#0x10C] @Loads pu1_src_top 10400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_TOP_LOOP: 10420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D0,[r12]! @pu1_src_top[col] = au1_src_top_tmp[col] 10430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#8 @Decrement the width 10440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 D0,[r3]! @pu1_src_top[col] = au1_src_top_tmp[col] 10450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_TOP_LOOP 10460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarEND_LOOPS: 10480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD sp,sp,#0xD4 10490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDMFD sp!,{r4-r12,r15} @Reload the registers from SP 10500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1053