10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/***************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ihevc_sao_edge_offset_class2_chroma.s 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Contains function definitions for inter prediction interpolation. 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Functions are coded using NEON intrinsics and can be compiled using@ ARM 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* RVCT 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:author 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Parthiban V 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:par List of Functions: 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:remarks 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* None 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@******************************************************************************* 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/ 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_sao_edge_offset_class2_chroma(UWORD8 *pu1_src, 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 src_strd, 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_left, 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top, 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top_left, 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_top_right, 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_src_bot_left, 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ UWORD8 *pu1_avail, 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD8 *pi1_sao_offset_u, 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD8 *pi1_sao_offset_v, 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 wd, 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@ WORD32 ht) 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************Variables Vs Registers***************************************** 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r0 => *pu1_src 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r1 => src_strd 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r2 => *pu1_src_left 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r3 => *pu1_src_top 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r4 => *pu1_src_top_left 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r5 => *pu1_avail 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r6 => *pi1_sao_offset_u 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r9 => *pi1_sao_offset_v 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r7 => wd 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r8=> ht 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.p2align 2 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gi1_table_edge_idx 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_sao_edge_offset_class2_chroma_a9q 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_1: 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl1 - 8 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_2: 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl2 - 8 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_3: 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl3 - 8 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_4: 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl4 - 8 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_5: 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl5 - 8 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_sao_edge_offset_class2_chroma_a9q: 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STMFD sp!,{r4-r12,r14} @stack stores the values of the arguments 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x40] @Loads wd 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x44] @Loads ht 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r9,r7,#2 @wd - 2 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x28] @Loads pu1_src_top_left 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r10,[r3,r9] @pu1_src_top[wd - 2] 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r0,[sp,#0x2C] @Store pu1_src in sp 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r9,r7 @Move width to r9 for loop count 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r2,[sp,#0x30] @Store pu1_src_left in sp 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x34] @Loads pu1_avail 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r6,[sp,#0x38] @Loads pi1_sao_offset_u 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r3,[sp,#0x38] @Store pu1_src_top in sp 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB sp,sp,#0xD4 @Decrement the stack pointer to store some temp arr values 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r10,[sp] @u1_src_top_left_tmp = pu1_src_top[wd - 2] 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r8,#1 @ht-1 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r11,r10,r1,r0 @pu1_src[(ht - 1) * src_strd + col] 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,sp,#10 @temp array 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_TOP_LOOP: 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D0,[r11]! @pu1_src[(ht - 1) * src_strd + col] 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r9,r9,#8 @Decrement the loop count by 8 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 D0,[r12]! @au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col] 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_TOP_LOOP 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_4_LOOP_U: 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r5,#4] @pu1_avail[4] 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r9,#0 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r0] @u1_pos_0_0_tmp_u = pu1_src[0] 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r0,#1] @u1_pos_0_0_tmp_v = pu1_src[1] 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_7_LOOP_U 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r4] @pu1_src_top_left[0] 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r14,r0,r1 @pu1_src + src_strd 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r12,r9,r11 @pu1_src[0] - pu1_src_top_left[0] 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r14,#2] @pu1_src[2 + src_strd] 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r12,#0 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r9,r14 @pu1_src[0] - pu1_src[2 + src_strd] 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r12,#1 @SIGN(pu1_src[0] - pu1_src_top_left[0]) 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_1 @table pointer 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl1: 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[0] - pu1_src[2 + src_strd]) 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r12,r11 @SIGN(pu1_src[0] - pu1_src_top_left[0]) + SIGN(pu1_src[0] - pu1_src[2 + src_strd]) 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 @0 != edge_idx 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_4_LOOP_V 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r6,r12] @pi1_sao_offset_u[edge_idx] 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r9,r11 @pu1_src[0] + pi1_sao_offset_u[edge_idx] 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r9,#8,r9 @u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_4_LOOP_V: 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r4,#1] @pu1_src_top_left[1] 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r14,r0,r1 @pu1_src + src_strd 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r12,r10,r11 @pu1_src[1] - pu1_src_top_left[1] 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r14,#3] @pu1_src[3 + src_strd] 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r12,#0 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r10,r14 @pu1_src[1] - pu1_src[3 + src_strd] 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r12,#1 @SIGN(pu1_src[0] - pu1_src_top_left[0]) 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_2 @table pointer 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl2: 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[0] - pu1_src[3 + src_strd]) 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r12,r11 @SIGN(pu1_src[0] - pu1_src_top_left[0]) + SIGN(pu1_src[0] - pu1_src[3 + src_strd]) 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 @0 != edge_idx 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_7_LOOP_U 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x110] @Loads pi1_sao_offset_v 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r11,r12] @pi1_sao_offset_v[edge_idx] 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r10,r10,r11 @pu1_src[0] + pi1_sao_offset_v[edge_idx] 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r10,#8,r10 @u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1) 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_7_LOOP_U: 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r10,[sp,#7] 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r9,[sp,#6] 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r5,#7] @pu1_avail[7] 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r7,#2 @wd - 2 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r8,#1 @ht - 1 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r12,r11,r1,r10 @wd - 2 + (ht - 1) * src_strd 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,r12,r0 @pu1_src[wd - 2 + (ht - 1) * src_strd] 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r12] @u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd] 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r12,#1] @u1_pos_wd_ht_tmp_v = pu1_src[wd - 2 + (ht - 1) * src_strd] 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_3_LOOP 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r12,r1 @pu1_src[(wd - 2 + (ht - 1) * src_strd) - src_strd] 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r11,#2 @pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd] 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11] @Load pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd] 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r10,r11 @pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd- 2 - src_strd] 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd- 2 - src_strd]) 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r14,r12,r1 @pu1_src[(wd - 2 + (ht - 1) * src_strd) + src_strd] 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r14,r14,#2 @pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd] 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r14] @Load pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd] 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r14,r10,r14 @pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd] 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r14,#0 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r14,#0 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r14,#1 @SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd]) 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,r14 @Add 2 sign value 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_3 @table pointer 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl3: 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r14,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r14,#0 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_7_LOOP_V 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r6,r14] @pi1_sao_offset_u[edge_idx] 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r10,r10,r11 @pu1_src[wd - 2 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx] 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r10,#8,r10 @u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_7_LOOP_V: 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,r12,#1 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r12,r1 @pu1_src[(wd - 1 + (ht - 1) * src_strd) - src_strd] 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r11,#2 @pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd] 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r11] @Load pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd] 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r9,r11 @pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd- 2 - src_strd] 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd]) 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r14,r12,r1 @pu1_src[(wd - 1 + (ht - 1) * src_strd) + src_strd] 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r14,r14,#2 @pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd] 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r14,[r14] @Load pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd] 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r14,r9,r14 @pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd] 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r14,#0 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r14,#0 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r14,#1 @SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 1 + src_strd]) 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,r14 @Add 2 sign value 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#2 @edge_idx 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14, gi1_table_edge_idx_addr_4 @table pointer 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl4: 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r14,r14,pc 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r12,[r14,r11] @edge_idx = gi1_table_edge_idx[edge_idx] 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r12,#0 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ PU1_AVAIL_3_LOOP 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r14,[sp,#0x110] @Loads pi1_sao_offset_v 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRSB r11,[r14,r12] @pi1_sao_offset_v[edge_idx] 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r9,r11 @pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx] 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar USAT r9,#8,r9 @u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1) 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_3_LOOP: 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r10,[sp,#8] 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q0,#2 @const_2 = vdupq_n_s8(2) 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRB r9,[sp,#9] 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r12,r8 @Move ht 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I16 Q1,#0 @const_min_clip = vdupq_n_s16(0) 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r14,r2 @Move pu1_src_left to pu1_src_left_cpy 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r5,#3] @pu1_avail[3] 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I16 Q2,#255 @const_max_clip = vdupq_n_u16((1 << bit_depth) - 1) 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r12,r12,#1 @ht_tmp-- 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADDEQ r0,r0,r1 @pu1_src += src_strd 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D6,[r6] @offset_tbl_u = vld1_s8(pi1_sao_offset_u) 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r12,r12,#1 @ht_tmp-- 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r6,[sp,#0x110] @Loads pi1_sao_offset_v 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADDEQ r14,r14,#2 @pu1_src_left_cpy += 2 2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r0,[sp,#2] @Store pu1_src in sp 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D7,[r6] @offset_tbl_v = vld1_s8(pi1_sao_offset_v) 2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r2, gi1_table_edge_idx_addr_5 @table pointer 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl5: 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar add r2,r2,pc 2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r6,r7 @move wd to r6 loop_count 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.S8 Q4,#0XFF @au1_mask = vdupq_n_s8(-1) 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#16 @Compare wd with 16 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT WIDTH_RESIDUE @If not jump to WIDTH_RESIDUE where loop is unrolled for 8 case 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#4 @Compare ht with 4 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLE WD_16_HT_4_LOOP @If jump to WD_16_HT_4_LOOP 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_LOOP_16: 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @col == wd 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#16 @if(col == 16) 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D8[1],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SKIP_AU1_MASK_VAL 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#1] @pu1_avail[1] 3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D9[6],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D9[7],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSKIP_AU1_MASK_VAL: 3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r5,#2] @pu1_avail[2] 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r9,#0 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x118] @Loads ht 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r0,r1 @pu1_src - src_strd 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,r3 @pu1_src_top_cpy 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,#2 @pu1_src - src_strd - 2 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r3,r3,#16 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd - 2) || vld1q_u8(pu1_src_top_cpy - 2) 3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd - 2) || vld1q_u8(pu1_src_top_cpy - 2) 3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 @(wd - col) 3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,#14 @15 + (wd - col) 3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x100] @Loads *pu1_src 3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)] 3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP: 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r8,[r7] @load the value and increment by src_strd 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r8,[r5],#2 @store it in the stack pointer 3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,r1 3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP 3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1 @I *pu1_src + src_strd 3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#16 @I 3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r8] @I pu1_src_cpy[src_strd + 16] 3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r10,[sp,#0x108] @I Loads pu1_avail 3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D18[0],r5 @I pu1_next_row_tmp = vsetq_lane_u8(pu1_src_cpy[src_strd + 16], pu1_next_row_tmp, 0) 3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r10,#2] @I pu1_avail[2] 3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 @I 3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q8,Q9,#2 @I pu1_next_row_tmp = vextq_u8(pu1_next_row, pu1_next_row_tmp, 2) 3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE @I 3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r0] @I pu1_src_cpy[0] 3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r4,r12,r7 @I ht_tmp - row 3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r0,#1] @I pu1_src_cpy[0] 3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL r4,r4,#1 @I (ht_tmp - row) * 2 3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r14,r4 @I pu1_src_left_cpy[(ht_tmp - row) * 2] 3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-2] @I load the value 3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r11,r5 @I pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2] 3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-1] @I load the value 3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 @I 3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r4,r10,r5 @I pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1] 3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 @I 3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @I SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r4,#0 @I 3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D14[0],r8 @I sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]), sign_up, 0) 3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r4,#0 @I 3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r4,#1 @I SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D14[1],r4 @I sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1]), sign_up, 1) 3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE: 4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D30,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q10,Q6,Q9 @I vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q11,Q6,Q9 @I vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q11,Q11,Q10 @I sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q0,Q7 @I edge_idx = vaddq_s8(const_2, sign_up) 4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q9,Q11 @I edge_idx = vaddq_s8(edge_idx, sign_down) 4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D18,{D30},D18 @I vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q11 @I sign_up = vnegq_s8(sign_down) 4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D19,{D30},D19 @I vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#14 @I sign_up = vextq_s8(sign_up, sign_up, 14) 4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D12 @I pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q11,Q9,Q4 @I edge_idx = vandq_s8(edge_idx, au1_mask) 4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q9,D13 @I pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D22,D23 @I 4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D22,{D6},D22 @I 4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D23,{D7},D23 @I 4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D22,D23 @I 4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 @I pu1_cur_row = pu1_next_row 4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D22 @I pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @I pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @I pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q9,Q9,D23 @I pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q9,Q9,Q1 @I pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q9,Q9,Q2 @I pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @I Decrement the ht_tmp loop count by 1 4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP: 4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1,LSL #1 @II *pu1_src + src_strd 4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @I vmovn_s16(pi2_tmp_cur_row.val[0]) 4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r8,r1 @III *pu1_src + src_strd 4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D30,[r11]! @III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D31,[r11] @III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,#8 4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#16 @II 4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q9 @I vmovn_s16(pi2_tmp_cur_row.val[1]) 4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r8] @II pu1_src_cpy[src_strd + 16] 4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r11,r11,#16 @III 4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D28[0],r5 @II pu1_next_row_tmp = vsetq_lane_u8(pu1_src_cpy[src_strd + 16], pu1_next_row_tmp, 0) 4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r4,[r11] @III pu1_src_cpy[src_strd + 16] 4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,r1] @II pu1_src_cpy[0] 4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q14,Q8,Q14,#2 @II pu1_next_row_tmp = vextq_u8(pu1_next_row, pu1_next_row_tmp, 2) 4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @II ht_tmp - row 4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL r5,r5,#1 @II (ht_tmp - row) * 2 4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D18[0],r4 @III pu1_next_row_tmp = vsetq_lane_u8(pu1_src_cpy[src_strd + 16], pu1_next_row_tmp, 0) 4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r14,r5 @II pu1_src_left_cpy[(ht_tmp - row) * 2] 4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r9,#-2] @II load the value 4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @I vst1q_u8(pu1_src_cpy, pu1_cur_row) 4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r11 @II pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2] 4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 @II 4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q15,Q9,#2 @III pu1_next_row_tmp = vextq_u8(pu1_next_row, pu1_next_row_tmp, 2) 4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r0,#1] @II pu1_src_cpy[0] 4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 @II 4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q11,Q6,Q14 @II vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @II SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-1] @II load the value 4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D14[0],r8 @II sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]), sign_up, 0) 4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @II Decrement the ht_tmp loop count by 1 4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r11,r11,r5 @II pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1] 4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q12,Q6,Q14 @II vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r11,#0 @II 4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r11,#0 @II 4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q12,Q11 @II sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r11,#1 @II SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r4,[r0,r1] @III pu1_src_cpy[0] 4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D22,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @III ht_tmp - row 4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r10,r0,r1 4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D14[1],r11 @II sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1]), sign_up, 1) 4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL r5,r5,#1 @III (ht_tmp - row) * 2 4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r14,r5 @III pu1_src_left_cpy[(ht_tmp - row) * 2] 4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @II edge_idx = vaddq_s8(const_2, sign_up) 5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r10,[r10,#1] @III pu1_src_cpy[0] 5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-2] @III load the value 5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @II edge_idx = vaddq_s8(edge_idx, sign_down) 5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r4,r4,r5 @III pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2] 5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r4,#0 @III 5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r9,[r9,#-1] @III load the value 5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D22},D26 @II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @II sign_up = vnegq_s8(sign_down) 5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r4,#0 @III 5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r10,r10,r9 @III pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1] 5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D22},D27 @II vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#14 @II sign_up = vextq_s8(sign_up, sign_up, 14) 5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r4,#1 @III SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @II edge_idx = vandq_s8(edge_idx, au1_mask) 5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r10,#0 @III 5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D26,D27 @II 5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d14[0],r4 @III sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]), sign_up, 0) 5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r10,#0 @III 5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r10,#1 @III SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D6},D26 @II 5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q10,Q8,Q9 @III vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q11,Q8,Q9 @III vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 @II 5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q11,Q11,Q10 @III sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D14[1],r10 @III sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1]), sign_up, 1) 5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D24,D25 @II 5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q0,Q7 @III edge_idx = vaddq_s8(const_2, sign_up) 5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D20,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @II pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q9,Q9,Q11 @III edge_idx = vaddq_s8(edge_idx, sign_down) 5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @II pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D18,{D20},D18 @III vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q11 @III sign_up = vnegq_s8(sign_down) 5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D19,{D20},D19 @III vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#14 @III sign_up = vextq_s8(sign_up, sign_up, 14) 5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q13,D13 @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q9,Q9,Q4 @III edge_idx = vandq_s8(edge_idx, au1_mask) 5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D18,D19 @III 5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D22,{D6},D18 @III 5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q13,Q13,D25 @II pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q15 @III pu1_cur_row = pu1_next_row 5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D23,{D7},D19 @III 5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q13,Q13,Q1 @II pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D16 @III pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q13,Q13,Q2 @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D22,D23 @III 5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D28,Q14 @II vmovn_s16(pi2_tmp_cur_row.val[0]) 5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D29,Q13 @II vmovn_s16(pi2_tmp_cur_row.val[1]) 5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D22 @III pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q9,D17 @III pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 5720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @III pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 5730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @III pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 5750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q9,Q9,D23 @III pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 5760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#1 @III Decrement the ht_tmp loop count by 1 5780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q9,Q9,Q1 @III pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 5790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,#1 5800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q14},[r0],r1 @II vst1q_u8(pu1_src_cpy, pu1_cur_row) 5820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q9,Q9,Q2 @III pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 5830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT PU1_SRC_LOOP @If not equal jump to PU1_SRC_LOOP 5850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT INNER_LOOP_DONE 5860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1,LSL #1 @*pu1_src + src_strd 5880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @III vmovn_s16(pi2_tmp_cur_row.val[0]) 5890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r0,r1] @pu1_src_cpy[0] 5910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 5920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 5930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 5940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r4,r12,r7 @ht_tmp - row 5950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#16 5970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q9 @III vmovn_s16(pi2_tmp_cur_row.val[1]) 5980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r8] @pu1_src_cpy[src_strd + 16] 5990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL r4,r4,#1 @(ht_tmp - row) * 2 6010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D18[0],r5 @pu1_next_row_tmp = vsetq_lane_u8(pu1_src_cpy[src_strd + 16], pu1_next_row_tmp, 0) 6020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r14,r4 @pu1_src_left_cpy[(ht_tmp - row) * 2] 6030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-2] @load the value 6050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q8,Q9,#2 @pu1_next_row_tmp = vextq_u8(pu1_next_row, pu1_next_row_tmp, 2) 6060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r11,r5 @pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2] 6070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 6090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @III vst1q_u8(pu1_src_cpy, pu1_cur_row) 6100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 6110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 6130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D30,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 6140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r11,[r0,#1] @pu1_src_cpy[0] 6160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D14[0],r8 @sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]), sign_up, 0) 6170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-1] @load the value 6180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r4,r11,r5 @pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1] 6200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q11,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 6210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r4,#0 6220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r4,#0 6240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q12,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 6250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r4,#1 @SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 6260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D14[1],r4 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1]), sign_up, 1) 6280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q12,Q11 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 6290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 6310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @edge_idx = vaddq_s8(edge_idx, sign_down) 6320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D30},D26 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 6340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D30},D27 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 6350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q10,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 6370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 6380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q9,D13 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 6400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D26,D27 6410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D6},D26 6430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 6440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D24,D25 6450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q10,Q10,D24 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 6470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q10,Q10,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 6480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q10,Q10,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 6490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q9,Q9,D25 @pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 6510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q9,Q9,Q1 @pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 6520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q9,Q9,Q2 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 6530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarINNER_LOOP_DONE: 6560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x118] @Loads ht 6570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D20,Q10 @vmovn_s16(pi2_tmp_cur_row.val[0]) 6580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 6590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x104] @Loads *pu1_src_left 6610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D21,Q9 @vmovn_s16(pi2_tmp_cur_row.val[1]) 6620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP: 6650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 6660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#2 6670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r11],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 6680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP 6690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r6,r6,#16 @Decrement the wd loop count by 16 6710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q10},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) 6720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#8 @Check whether residue remains 6730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT RE_ASSINING_LOOP @Jump to re-assigning loop 6750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 6760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r0,[sp,#0x02] @Loads *pu1_src 6770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 6780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r0,r0,r7 6790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT WIDTH_LOOP_16 @If not equal jump to width_loop 6800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BEQ WIDTH_RESIDUE @If residue remains jump to residue loop 6810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWD_16_HT_4_LOOP: 6840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 6850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 6860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @col == wd 6870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 6880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 6900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 6910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D8[1],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 6920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,#16 @if(col == 16) 6940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SKIP_AU1_MASK_VAL_WD_16_HT_4 6950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#1] @pu1_avail[1] 6960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D9[6],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 6970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 D9[7],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 6980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSKIP_AU1_MASK_VAL_WD_16_HT_4: 7000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#2] @pu1_avail[2] 7010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 7020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r0,r1 @pu1_src - src_strd 7040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,r3 @pu1_src_top_cpy 7050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,#2 @pu1_src - src_strd - 2 7060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd - 2) || vld1q_u8(pu1_src_top_cpy - 2) 7070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd - 2) || vld1q_u8(pu1_src_top_cpy - 2) 7080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 7090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r3,r3,#16 7110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 7120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x118] @Loads ht 7130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 7140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,r6 @(wd - col) 7150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,#14 @15 + (wd - col) 7160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x100] @Loads *pu1_src 7170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + 15 + (wd - col)] 7180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP_WD_16_HT_4: 7200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r8,[r7] @load the value and increment by src_strd 7210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r8,[r5],#2 @store it in the stack pointer 7220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,r1 7230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 7250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP_WD_16_HT_4 7260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 7280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 7290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 7300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 7320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 7330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 7340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 7350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 7360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP_WD_16_HT_4: 7380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 7390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1 @*pu1_src + src_strd 7400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 7410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 7420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 7430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#16 7450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r8] @pu1_src_cpy[src_strd + 16] 7460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D18[0],r5 @pu1_next_row_tmp = vsetq_lane_u8(pu1_src_cpy[src_strd + 16], pu1_next_row_tmp, 0) 7470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q8,Q9,#2 @pu1_next_row_tmp = vextq_u8(pu1_next_row, pu1_next_row_tmp, 2) 7480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,r12 7500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT SIGN_UP_CHANGE_WD_16_HT_4 7510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 7520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 7530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 7540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE_WD_16_HT_4 7550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_WD_16_HT_4: 7570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0] @pu1_src_cpy[0] 7580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @ht_tmp - row 7590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL r5,r5,#1 @(ht_tmp - row) * 2 7600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r14,r5 @pu1_src_left_cpy[(ht_tmp - row) * 2] 7610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-2] @load the value 7620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2] 7630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 7640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 7650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 7660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d14[0],r8 @sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]), sign_up, 0) 7670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#1] @pu1_src_cpy[0] 7690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-1] @load the value 7700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1] 7710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 7720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 7730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 7740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d14[1],r8 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1]), sign_up, 1) 7750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE_WD_16_HT_4: 7770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q11,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 7780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q12,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 7790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q12,Q11 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 7800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 7820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @edge_idx = vaddq_s8(edge_idx, sign_down) 7830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D22,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 7850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D22},D26 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 7860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D22},D27 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 7870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 7890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @sign_up = vnegq_s8(sign_down) 7910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#14 @sign_up = vextq_s8(sign_up, sign_up, 14) 7920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D26,D27 7940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D6},D26 7950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 7960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D24,D25 7970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 7990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 8000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 8010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 8020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q13,D13 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row))) 8040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q13,Q13,D25 @pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset) 8050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q13,Q13,Q1 @pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip) 8060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q13,Q13,Q2 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip)) 8070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D28,Q14 @vmovn_s16(pi2_tmp_cur_row.val[0]) 8090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D29,Q13 @vmovn_s16(pi2_tmp_cur_row.val[1]) 8100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {Q14},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) 8120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 @pu1_cur_row = pu1_next_row 8140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#1 @Decrement the ht_tmp loop count by 1 8150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE PU1_SRC_LOOP_WD_16_HT_4 @If not equal jump to PU1_SRC_LOOP_WD_16_HT_4 8160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x118] @Loads ht 8180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 8190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x104] @Loads *pu1_src_left 8200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP_WD_16_HT_4: 8220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 8230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r11],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 8240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#2 8260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP_WD_16_HT_4 8270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r6,r6,#16 @Decrement the wd loop count by 16 8300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLE RE_ASSINING_LOOP @Jump to re-assigning loop 8310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BGT WD_16_HT_4_LOOP 8320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_RESIDUE: 8350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 8360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 8370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r6,r7 @wd_residue == wd 8380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDREQB r8,[r5] @pu1_avail[0] 8390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,#-1 8410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[0],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 8420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[1],r8 @au1_mask = vsetq_lane_s8(-1, au1_mask, 0) 8430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#1] @pu1_avail[1] 8450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[6],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 8460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d8[7],r8 @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15) 8470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r5,#2] @pu1_avail[2] 8490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 8500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBEQ r8,r0,r1 @pu1_src - src_strd 8520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVNE r8,r3 8530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,#2 @pu1_src - src_strd - 2 8540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D10,[r8]! @pu1_top_row = vld1q_u8(pu1_src - src_strd - 2) 8550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D11,[r8] @pu1_top_row = vld1q_u8(pu1_src - src_strd - 2) 8560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 8570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 8590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0x118] @Loads ht 8600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 8610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x100] @Loads *pu1_src 8620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r7,r7,#2 @(wd - 2) 8630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r8,r7 @pu1_src[0 * src_strd + (wd - 2)] 8640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP_RESIDUE: 8660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r8,[r7] @load the value and increment by src_strd 8670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r8,[r5],#2 @store it in the stack pointer 8680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r7,r7,r1 8690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r4,r4,#1 @decrement the loop count 8700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE AU1_SRC_LEFT_LOOP_RESIDUE 8710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D12,[r0]! @pu1_cur_row = vld1q_u8(pu1_src) 8730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D13,[r0] @pu1_cur_row = vld1q_u8(pu1_src) 8740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r0,#8 8750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q7,Q6,Q5 @vcgtq_u8(pu1_cur_row, pu1_top_row) 8770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q8,Q6,Q5 @vcltq_u8(pu1_cur_row, pu1_top_row) 8780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q7,Q8,Q7 @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 8790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOV r7,r12 @row count, move ht_tmp to r7 8800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP_RESIDUE: 8820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.I8 Q9,#0 8830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r0,r1 @*pu1_src + src_strd 8840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D16,[r8]! @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 8850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D17,[r8] @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 8860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,#8 8870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r8,r8,#16 8890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r5,[r8] @pu1_src_cpy[src_strd + 16] 8900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.16 D18[0],r5 @pu1_next_row_tmp = vsetq_lane_u8(pu1_src_cpy[src_strd + 16], pu1_next_row_tmp, 0) 8910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q9,Q8,Q9,#2 @pu1_next_row_tmp = vextq_u8(pu1_next_row, pu1_next_row_tmp, 2) 8920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r7,r12 8940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BLT SIGN_UP_CHANGE_RESIDUE 8950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r5,[sp,#0x108] @Loads pu1_avail 8960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r5,#2] @pu1_avail[2] 8970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r5,#0 8980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SIGN_UP_CHANGE_DONE_RESIDUE 8990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_RESIDUE: 9010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0] @pu1_src_cpy[0] 9020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r5,r12,r7 @ht_tmp - row 9030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LSL r5,r5,#1 @(ht_tmp - row) * 2 9040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r9,r14,r5 @pu1_src_left_cpy[(ht_tmp - 1 - row) * 2] 9050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-2] @load the value 9060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2] 9070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 9080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 9090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 9100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d14[0],r8 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[0] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]), sign_up, 0) 9110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r8,[r0,#1] @pu1_src_cpy[0] 9130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRB r5,[r9,#-1] @load the value 9140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,r5 @pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2] 9150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar CMP r8,#0 9160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MVNLT r8,#0 9170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MOVGT r8,#1 @SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2]) 9180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV.8 d14[1],r8 @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[1] - pu1_src_left_cpy[(ht_tmp - 1 - row) * 2 + 1]), sign_up, 1) 9190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE_RESIDUE: 9210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCGT.U8 Q11,Q6,Q9 @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp) 9220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VCLT.U8 Q12,Q6,Q9 @vcltq_u8(pu1_cur_row, pu1_next_row_tmp) 9230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VSUB.U8 Q12,Q12,Q11 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt)) 9240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q0,Q7 @edge_idx = vaddq_s8(const_2, sign_up) 9260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADD.I8 Q13,Q13,Q12 @edge_idx = vaddq_s8(edge_idx, sign_down) 9270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D22,[r2] @edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 9290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D26,{D22},D26 @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx)) 9300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D27,{D22},D27 @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx)) 9310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VAND Q13,Q13,Q4 @edge_idx = vandq_s8(edge_idx, au1_mask) 9330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VNEG.S8 Q7,Q12 @sign_up = vnegq_s8(sign_down) 9350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VEXT.8 Q7,Q7,Q7,#14 @sign_up = vextq_s8(sign_up, sign_up, 14) 9360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VUZP.8 D26,D27 9380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D24,{D6},D26 9390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VTBL.8 D25,{D7},D27 9400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VZIP.8 D24,D25 9410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVL.U8 Q14,D12 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row))) 9430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VADDW.S8 Q14,Q14,D24 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset) 9440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMAX.S16 Q14,Q14,Q1 @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip) 9450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMIN.U16 Q14,Q14,Q2 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip)) 9460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOVN.I16 D28,Q14 @vmovn_s16(pi2_tmp_cur_row.val[0]) 9480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 {D28},[r0],r1 @vst1q_u8(pu1_src_cpy, pu1_cur_row) 9500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VMOV Q6,Q8 @pu1_cur_row = pu1_next_row 9520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#1 @Decrement the ht_tmp loop count by 1 9530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE PU1_SRC_LOOP_RESIDUE @If not equal jump to PU1_SRC_LOOP 9540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x118] @Loads ht 9560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r11,[sp,#0x104] @Loads *pu1_src_left 9570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r5,sp,#0x4B @*au1_src_left_tmp 9580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP_RESIDUE: 9600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[r5],#4 @au1_src_left_tmp[row] 9610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r8,r8,#2 9620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STR r7,[r11],#4 @pu1_src_left[row] = au1_src_left_tmp[row] 9630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_LEFT_LOOP_RESIDUE 9650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarRE_ASSINING_LOOP: 9680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r8,[sp,#0x118] @Loads ht 9690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r0,[sp,#0x100] @Loads *pu1_src 9710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUB r8,r8,#1 @ht - 1 9720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r7,[sp,#0x114] @Loads wd 9740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r9,[sp,#6] 9760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar MLA r6,r8,r1,r7 @wd - 2 + (ht - 1) * src_strd 9770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r9,[r0] @pu1_src_org[0] = u1_pos_0_0_tmp 9790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r6,r0,r6 @pu1_src[wd - 2 + (ht - 1) * src_strd] 9800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r9,[sp,#8] 9820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD r12,sp,#10 9830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r9,[r6,#-2] @pu1_src_org[wd - 1 + (ht - 1) * src_strd] = u1_pos_wd_ht_tmp_u 9840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r4,[sp,#0xFC] @Loads pu1_src_top_left 9860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDRH r10,[sp] @load u1_src_top_left_tmp from stack pointer 9870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar STRH r10,[r4] @*pu1_src_top_left = u1_src_top_left_tmp 9880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDR r3,[sp,#0x10C] @Loads pu1_src_top 9890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_TOP_LOOP: 9910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VLD1.8 D0,[r12]! @pu1_src_top[col] = au1_src_top_tmp[col] 9920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar SUBS r7,r7,#8 @Decrement the width 9930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar VST1.8 D0,[r3]! @pu1_src_top[col] = au1_src_top_tmp[col] 9940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar BNE SRC_TOP_LOOP 9950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarEND_LOOPS: 9970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ADD sp,sp,#0xD4 9980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar LDMFD sp!,{r4-r12,r15} @Reload the registers from SP 9990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1002