10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/*****************************************************************************
20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Licensed under the Apache License, Version 2.0 (the "License");
60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* you may not use this file except in compliance with the License.
70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* You may obtain a copy of the License at:
80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* http://www.apache.org/licenses/LICENSE-2.0
100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Unless required by applicable law or agreed to in writing, software
120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* distributed under the License is distributed on an "AS IS" BASIS,
130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* See the License for the specific language governing permissions and
150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* limitations under the License.
160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*****************************************************************************/
180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@/**
190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:file
210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  ihevc_sao_edge_offset_class3_chroma.s
220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:brief
240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  Contains function definitions for inter prediction  interpolation.
250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* Functions are coded using NEON  intrinsics and can be compiled using@ ARM
260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* RVCT
270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:author
290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  Parthiban V
300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:par List of Functions:
320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@* ,:remarks
350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*  None
360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*
370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*******************************************************************************
380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@*/
390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src,
400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              WORD32 src_strd,
410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              UWORD8 *pu1_src_left,
420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              UWORD8 *pu1_src_top,
430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              UWORD8 *pu1_src_top_left,
440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              UWORD8 *pu1_src_top_right,
450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              UWORD8 *pu1_src_bot_left,
460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              UWORD8 *pu1_avail,
470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              WORD8 *pi1_sao_offset_u,
480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              WORD8 *pi1_sao_offset_v,
490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              WORD32 wd,
500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@                              WORD32 ht)
510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@**************Variables Vs Registers*****************************************
520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r0 =>  *pu1_src
530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r1 =>  src_strd
540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r2 =>  *pu1_src_left
550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r3 =>  *pu1_src_top
560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r4 =>  *pu1_src_top_left
570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r5 =>  *pu1_avail
580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r6 =>  *pi1_sao_offset_u
590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r9 =>  *pi1_sao_offset_v
600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r7 =>  wd
610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar@r8=>   ht
620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text
640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.p2align 2
650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.extern gi1_table_edge_idx
670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevc_sao_edge_offset_class3_chroma_a9q
680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_1:
700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl1 - 8
710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_2:
730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl2 - 8
740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_3:
760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl3 - 8
770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_4:
790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl4 - 8
800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakargi1_table_edge_idx_addr_5:
820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.long gi1_table_edge_idx - ulbl5 - 8
830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevc_sao_edge_offset_class3_chroma_a9q:
850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STMFD       sp!,{r4-r12,r14}            @stack stores the values of the arguments
880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x40]               @Loads wd
900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r8,[sp,#0x44]               @Loads ht
910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r9,r7,#2                    @wd - 2
920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r4,[sp,#0x28]               @Loads pu1_src_top_left
940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r10,[r3,r9]                 @pu1_src_top[wd - 2]
950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         r9,r7                       @Move width to r9 for loop count
970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x34]               @Loads pu1_avail
990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r6,[sp,#0x38]               @Loads pi1_sao_offset_u
1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STR         r3,[sp,#0x38]               @Store pu1_src_top in sp
1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         sp,sp,#0xD4                 @Decrement the stack pointer to store some temp arr values
1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRH        r10,[sp]                    @u1_src_top_left_tmp = pu1_src_top[wd - 2]
1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,r8,#1                   @ht-1
1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MLA         r11,r10,r1,r0               @pu1_src[(ht - 1) * src_strd + col]
1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r12,sp,#10                  @temp array
1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_TOP_LOOP:
1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D0,[r11]!                   @pu1_src[(ht - 1) * src_strd + col]
1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r9,r9,#8                    @Decrement the loop count by 8
1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VST1.8      D0,[r12]!                   @au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col]
1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         AU1_SRC_TOP_LOOP
1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_5_LOOP_U:
1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r9,[r5,#5]                  @pu1_avail[5]
1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r9,#0
1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r14,r7,#2                   @[wd - 2]
1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r9,[r0,r14]                 @u1_pos_0_0_tmp_u = pu1_src[wd - 2]
1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r7,#1                   @[wd - 1]
1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r0,r11]                @u1_pos_0_0_tmp_v = pu1_src[wd - 1]
1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         PU1_AVAIL_6_LOOP_U
1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r11,[sp,#0x100]             @Load pu1_src_top_right from sp
1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r11]                   @pu1_src_top_right[0]
1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r12,r9,r11                  @pu1_src[wd - 2] - pu1_src_top_right[0]
1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r12,#0
1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r12,#0
1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r12,#1                      @SIGN(pu1_src[wd - 2] - pu1_src_top_right[0])
1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r0,r1                   @pu1_src + src_strd
1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r14,r14,#2                  @[wd - 2 - 2]
1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r14,[r11,r14]               @pu1_src[wd - 2 - 2 + src_strd]
1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r9,r14                  @pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]
1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r11,#0
1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r11,#0
1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r11,#1                      @SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd])
1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r12,r11                 @SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) +  SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd])
1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r11,#2                  @edge_idx
1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r14, gi1_table_edge_idx_addr_1 @table pointer
1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl1:
1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r14,r14,pc
1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRSB       r12,[r14,r11]               @edge_idx = gi1_table_edge_idx[edge_idx]
1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r12,#0                      @0 != edge_idx
1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         PU1_AVAIL_5_LOOP_V
1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRSB       r11,[r6,r12]                @pi1_sao_offset_u[edge_idx]
1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r9,r9,r11                   @pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx]
1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    USAT        r9,#8,r9                    @u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_5_LOOP_V:
1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r11,[sp,#0x100]             @Load pu1_src_top_right from sp
1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r11,#1]                @pu1_src_top_right[1]
1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r12,r10,r11                 @pu1_src[wd - 1] - pu1_src_top_right[1]
1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r12,#0
1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r12,#0
1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r12,#1                      @SIGN(pu1_src[wd - 1] - pu1_src_top_right[1])
1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r0,r1                   @pu1_src + src_strd
1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r14,r7,#3                   @[wd - 1 - 2]
1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r14,[r11,r14]               @pu1_src[wd - 1 - 2 + src_strd]
1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r10,r14                 @pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]
1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r11,#0
1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r11,#0
1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r11,#1                      @SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd])
1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r12,r11                 @SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) +  SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd])
1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r11,#2                  @edge_idx
1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r14, gi1_table_edge_idx_addr_2 @table pointer
1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl2:
1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r14,r14,pc
1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRSB       r12,[r14,r11]               @edge_idx = gi1_table_edge_idx[edge_idx]
1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r12,#0                      @0 != edge_idx
1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         PU1_AVAIL_6_LOOP_U
1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r11,[sp,#0x110]             @Loads pi1_sao_offset_v
1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRSB       r11,[r11,r12]               @pi1_sao_offset_v[edge_idx]
1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r10,r10,r11                 @pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx]
1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    USAT        r10,#8,r10                  @u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_6_LOOP_U:
1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRB        r9,[sp,#6]
1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRB        r10,[sp,#7]
1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STR         r0,[sp,#0x100]              @Store pu1_src in sp
1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r5,#6]                 @pu1_avail[6]
1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r10,#0
1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r8,#1                   @ht - 1
1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MLA         r12,r11,r1,r0               @pu1_src[(ht - 1) * src_strd]
1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r12]                   @u1_pos_wd_ht_tmp_u = pu1_src[(ht - 1) * src_strd]
1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r9,[r12,#1]                 @u1_pos_wd_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1]
1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         PU1_AVAIL_3_LOOP
1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r12,r1                  @pu1_src[(ht - 1) * src_strd - src_strd]
1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r11,#2                  @pu1_src[(ht - 1) * src_strd +  2 - src_strd]
1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r11]                   @Load pu1_src[(ht - 1) * src_strd +  2 - src_strd]
1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r10,r11                 @pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd +  2 - src_strd]
1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r11,#0
1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r11,#0
1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r11,#1                      @SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd +  2 - src_strd])
1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r14,[sp,#0x104]             @Load pu1_src_bot_left from sp
2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r14,[r14]                   @Load pu1_src_bot_left[0]
2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r14,r10,r14                 @pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]
2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r14,#0
2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r14,#0
2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r14,#1                      @SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0])
2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r11,r14                 @Add 2 sign value
2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r11,#2                  @edge_idx
2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r14, gi1_table_edge_idx_addr_3 @table pointer
2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl3:
2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r14,r14,pc
2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRSB       r14,[r14,r11]               @edge_idx = gi1_table_edge_idx[edge_idx]
2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r14,#0
2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         PU1_AVAIL_6_LOOP_V
2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRSB       r11,[r6,r14]                @pi1_sao_offset_u[edge_idx]
2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r10,r10,r11                 @pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx]
2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    USAT        r10,#8,r10                  @u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_6_LOOP_V:
2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r12,r12,#1                  @pu1_src[(ht - 1) * src_strd + 1]
2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r12,r1                  @pu1_src[(ht - 1) * src_strd + 1) - src_strd]
2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r11,#2                  @pu1_src[(ht - 1) * src_strd + 2 - src_strd]
2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r11]                   @Load pu1_src[(ht - 1) * src_strd + 2 - src_strd]
2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r9,r11                  @pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]
2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r11,#0
2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r11,#0
2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r11,#1                      @SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd])
2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r14,[sp,#0x104]             @Load pu1_src_bot_left from sp
2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r14,[r14,#1]                @Load pu1_src_bot_left[1]
2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r14,r9,r14                  @pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]
2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r14,#0
2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r14,#0
2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r14,#1                      @SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1])
2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r11,r14                 @Add 2 sign value
2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r11,#2                  @edge_idx
2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r14, gi1_table_edge_idx_addr_4 @table pointer
2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl4:
2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r14,r14,pc
2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRSB       r12,[r14,r11]               @edge_idx = gi1_table_edge_idx[edge_idx]
2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r12,#0
2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         PU1_AVAIL_3_LOOP
2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r14,[sp,#0x110]             @Loads pi1_sao_offset_v
2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRSB       r11,[r14,r12]               @pi1_sao_offset_v[edge_idx]
2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r9,r9,r11                   @pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx]
2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    USAT        r9,#8,r9                    @u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_3_LOOP:
2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRB        r10,[sp,#8]
2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRB        r9,[sp,#9]
2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STR         r2,[sp,#0x104]              @Store pu1_src_left in sp
2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         r12,r8                      @Move ht
2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         r14,r2                      @Move pu1_src_left to pu1_src_left_cpy
2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r5,#3]                 @pu1_avail[3]
2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r11,#0
2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         PU1_AVAIL_2_LOOP
2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r12,r12,#1                  @ht_tmp--
2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_2_LOOP:
2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r5,#2]                  @pu1_avail[2]
2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r5,#0
2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         PU1_AVAIL_2_LOOP_END
2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r0,r0,r1                    @pu1_src += src_strd
2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r12,r12,#1                  @ht_tmp--
2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r14,r14,#2                  @pu1_src_left_cpy += 2
2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_AVAIL_2_LOOP_END:
2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STR         r0,[sp,#2]                  @Store pu1_src in sp
2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.I8     Q0,#2                       @const_2 = vdupq_n_s8(2)
2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.I16    Q1,#0                       @const_min_clip = vdupq_n_s16(0)
2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.I16    Q2,#255                     @const_max_clip = vdupq_n_u16((1 << bit_depth) - 1)
2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D6,[r6]                     @offset_tbl_u = vld1_s8(pi1_sao_offset_u)
2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r6,[sp,#0x110]              @Loads pi1_sao_offset_v
2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D7,[r6]                     @offset_tbl_v = vld1_s8(pi1_sao_offset_v)
2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r2, gi1_table_edge_idx_addr_5 @table pointer
2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarulbl5:
2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    add         r2,r2,pc
2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    @VLD1.8     D6,[r6]                     @edge_idx_tbl = vld1_s8(gi1_table_edge_idx)
2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.S8     Q4,#0xFF                    @au1_mask = vdupq_n_s8(-1)
2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         r6,r7                       @move wd to r6 loop_count
2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r7,#16                      @Compare wd with 16
2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BLT         WIDTH_RESIDUE               @If not jump to WIDTH_RESIDUE where loop is unrolled for 8 case
2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r8,#4                       @Compare ht with 4
2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BLE         WD_16_HT_4_LOOP             @If jump to WD_16_HT_4_LOOP
2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_LOOP_16:
2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x114]              @Loads wd
2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r6,r7                       @col == wd
2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @Loads pu1_avail
2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDREQB      r8,[r5]                     @pu1_avail[0]
2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVNE       r8,#-1
2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D8[0],r8                    @au1_mask = vsetq_lane_s8(-1, au1_mask, 0)
3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r5,#2]                 @pu1_avail[2]
3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r6,#16                      @if(col == 16)
3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D8[1],r8                    @au1_mask = vsetq_lane_s8(-1, au1_mask, 0)
3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SKIP_AU1_MASK_VAL
3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r5,#1]                  @pu1_avail[1]
3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D9[6],r8                    @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15)
3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D9[7],r8                    @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15)
3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSKIP_AU1_MASK_VAL:
3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r11,#0
3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D12,[r0]!                   @pu1_cur_row = vld1q_u8(pu1_src)
3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D13,[r0]                    @pu1_cur_row = vld1q_u8(pu1_src)
3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r0,#8
3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r5,sp,#0x4B                 @*au1_src_left_tmp
3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBEQ       r8,r0,r1                    @pu1_src - src_strd
3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.I8     Q9,#0
3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVNE       r8,r3
3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r8,#2                    @pu1_src - src_strd + 2
3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D10,[r8]!                   @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D11,[r8]                    @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r8,#8
3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r3,r3,#16
3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r4,[sp,#0x118]              @Loads ht
3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q7,Q6,Q5                    @vcgtq_u8(pu1_cur_row, pu1_top_row)
3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x114]              @Loads wd
3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r7,r7,r6                    @(wd - col)
3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q8,Q6,Q5                    @vcltq_u8(pu1_cur_row, pu1_top_row)
3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r7,r7,#14                   @15 + (wd - col)
3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r8,[sp,#0x100]              @Loads *pu1_src
3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q7,Q8,Q7                    @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r7,r8,r7                    @pu1_src[0 * src_strd + 15 + (wd - col)]
3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP:
3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r8,[r7]                     @load the value and increment by src_strd
3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r4,r4,#1                    @decrement the loop count
3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRH        r8,[r5],#2                  @store it in the stack pointer
3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r7,r7,r1
3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         AU1_SRC_LEFT_LOOP
3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         r7,r12                      @row count, move ht_tmp to r7
3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.I8     Q9,#0                       @I
3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r0,r1                   @I *pu1_src + src_strd
3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r5,r12,r7                   @I ht_tmp - row
3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D16,[r11]!                  @I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D17,[r11]                   @I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,#8
3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r14,r5,LSL #1            @I pu1_src_left_cpy[(ht_tmp - row) * 2]
3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r5,[r8,#2]                  @I
3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.16     D19[3],r5                   @I vsetq_lane_u8
3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r11,[sp,#0x108]             @I Loads pu1_avail
3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r11,#2]                @I pu1_avail[2]
3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q9,Q9,Q8,#14                @I pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14)
3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r11,#0                      @I
3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SIGN_UP_CHANGE_DONE         @I
3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r0,#14]                 @I pu1_src_cpy[14]
3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r5,r0,r1                    @I
3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r5,#16]                @I load the value pu1_src_cpy[16 - src_strd]
3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r9,[r0,#15]                 @I pu1_src_cpy[15]
3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r8,r8,r11                   @I pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]
3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r5,#17]                @I load the value pu1_src_cpy[17 - src_strd]
3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r8,#0                       @I
3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r8,#0                       @I
3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r9,r9,r10                   @I pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r8,#1                       @I SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd])
3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r9,#0                       @I
3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r9,#0                       @I
3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[6],r8                   @I sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0)
3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r9,#1                       @I SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[7],r9                   @I sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1)
3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE:
3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D28,[r2]                    @edge_idx_tbl = vld1_s8(gi1_table_edge_idx)
3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q10,Q6,Q9                   @I vcgtq_u8(pu1_cur_row, pu1_next_row_tmp)
3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q11,Q6,Q9                   @I vcltq_u8(pu1_cur_row, pu1_next_row_tmp)
3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q11,Q11,Q10                 @I sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q9,Q0,Q7                    @I edge_idx = vaddq_s8(const_2, sign_up)
3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q9,Q9,Q11                   @I edge_idx = vaddq_s8(edge_idx, sign_down)
4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D18,{D28},D18               @I vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VNEG.S8     Q7,Q11                      @I sign_up = vnegq_s8(sign_down)
4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D19,{D28},D19               @I vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx))
4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q7,Q7,Q7,#2                 @I sign_up = vextq_s8(sign_up, sign_up, 2)
4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q10,D12                     @I pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row)))
4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VAND        Q9,Q9,Q4                    @I edge_idx = vandq_s8(edge_idx, au1_mask)
4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VUZP.8      D18,D19                     @I
4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D22,{D6},D18                @I
4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D23,{D7},D19                @I
4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VZIP.8      D22,D23                     @I
4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q9,D13                      @I pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row)))
4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q10,Q10,D22                 @I pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset)
4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q10,Q10,Q1                  @I pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip)
4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q10,Q10,Q2                  @I pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip))
4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV        Q6,Q8                       @I pu1_cur_row = pu1_next_row
4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q9,Q9,D23                   @I pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset)
4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r7,r7,#1                    @I Decrement the ht_tmp loop count by 1
4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q9,Q9,Q1                    @I pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip)
4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q9,Q9,Q2                    @I pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip))
4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP:
4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r0,r1,LSL #1            @II *pu1_src + src_strd
4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D20,Q10                     @I vmovn_s16(pi2_tmp_cur_row.val[0])
4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r5,r12,r7                   @II ht_tmp - row
4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r4,r0,r1                    @III *pu1_src + src_strd
4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D21,Q9                      @I vmovn_s16(pi2_tmp_cur_row.val[1])
4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r14,r5,LSL #1            @II pu1_src_left_cpy[(ht_tmp - row) * 2]
4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r9,[r8,#2]
4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D16,[r11]!                  @II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D17,[r11]                   @II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,#8
4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r4,#14]                @II pu1_src_cpy[14]
4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r4,#15]                 @II pu1_src_cpy[15]
4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.16     D29[3],r9                   @II vsetq_lane_u8
4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r4,r11,r1                   @III *pu1_src + src_strd
4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r0,#17]                 @II load the value pu1_src_cpy[17 - src_strd]
4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D30,[r4]!                   @III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D31,[r4]                    @III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r4,#8
4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r0,#16]                @II load the value pu1_src_cpy[16 - src_strd]
4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r7,r7,#1                    @II Decrement the ht_tmp loop count by 1
4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VST1.8      {Q10},[r0],r1               @I vst1q_u8(pu1_src_cpy, pu1_cur_row)
4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,r10,r11                 @II pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]
4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r10,#0                      @II
4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q14,Q14,Q8,#14              @II pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14)
4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r8,r8,r5                    @II pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r10,#0                      @II
4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D21,[r2]                    @edge_idx_tbl = vld1_s8(gi1_table_edge_idx)
4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r10,#1                      @II SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd])
4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r8,#0                       @II
4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[6],r10                  @II sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0)
4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r8,#0                       @II
4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r8,#1                       @II SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,r12,r7                  @III ht_tmp - row
4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[7],r8                   @II sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1)
4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r14,r10,LSL #1          @III pu1_src_left_cpy[(ht_tmp - row) * 2]
4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r7,#1                       @III
4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q11,Q6,Q14                  @II vcgtq_u8(pu1_cur_row, pu1_next_row_tmp)
4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         NEXT_ROW_POINTER_ASSIGNED_2 @III
4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @III Loads pu1_avail
4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r5,#3]                  @III pu1_avail[3]
4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r5,#0                       @III
4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBNE       r11,r4,#4                   @III pu1_src[src_strd - 2]
4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_2:
4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r5,[r11,#2]                 @III
4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q12,Q6,Q14                  @II vcltq_u8(pu1_cur_row, pu1_next_row_tmp)
4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r0,r1                   @III
4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r9,[r11,#14]                @III pu1_src_cpy[14]
4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.16     D19[3],r5                   @III vsetq_lane_u8
4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r11,#15]                @III pu1_src_cpy[15]
4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r0,#16]                @III load the value pu1_src_cpy[16 - src_strd]
4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q12,Q12,Q11                 @II sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r0,#17]                @III load the value pu1_src_cpy[17 - src_strd]
4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r9,r9,r11                   @III pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]
4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q9,Q9,Q15,#14               @III pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14)
4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,r8,r10                  @III pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r9,#0                       @III
5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q13,Q0,Q7                   @II edge_idx = vaddq_s8(const_2, sign_up)
5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r9,#0                       @III
5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r9,#1                       @III SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd])
5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q13,Q13,Q12                 @II edge_idx = vaddq_s8(edge_idx, sign_down)
5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r10,#0                      @III
5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VNEG.S8     Q7,Q12                      @II sign_up = vnegq_s8(sign_down)
5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D26,{D21},D26               @II vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r10,#0                      @III
5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r10,#1                      @III SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q7,Q7,Q7,#2                 @II sign_up = vextq_s8(sign_up, sign_up, 2)
5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D27,{D21},D27               @II vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx))
5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q11,Q8,Q9                   @III vcgtq_u8(pu1_cur_row, pu1_next_row_tmp)
5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[6],r9                   @III sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0)
5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VAND        Q13,Q13,Q4                  @II edge_idx = vandq_s8(edge_idx, au1_mask)
5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[7],r10                  @III sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1)
5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VUZP.8      D26,D27                     @II
5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q10,Q8,Q9                   @III vcltq_u8(pu1_cur_row, pu1_next_row_tmp)
5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D24,{D6},D26                @II
5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q11,Q10,Q11                 @III sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q9,Q0,Q7                    @III edge_idx = vaddq_s8(const_2, sign_up)
5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D25,{D7},D27                @II
5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q9,Q9,Q11                   @III edge_idx = vaddq_s8(edge_idx, sign_down)
5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D20,[r2]                    @edge_idx_tbl = vld1_s8(gi1_table_edge_idx)
5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VZIP.8      D24,D25                     @II
5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q14,D12                     @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row)))
5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D18,{D20},D18               @III vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VNEG.S8     Q7,Q11                      @III sign_up = vnegq_s8(sign_down)
5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q14,Q14,D24                 @II pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset)
5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D19,{D20},D19               @III vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx))
5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q7,Q7,Q7,#2                 @III sign_up = vextq_s8(sign_up, sign_up, 2)
5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q13,D13                     @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row)))
5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VAND        Q9,Q9,Q4                    @III edge_idx = vandq_s8(edge_idx, au1_mask)
5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q10,D16                     @III pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row)))
5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VUZP.8      D18,D19                     @III
5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q14,Q14,Q1                  @II pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip)
5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D22,{D6},D18                @III
5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q14,Q14,Q2                  @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip))
5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q13,Q13,D25                 @II pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset)
5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D23,{D7},D19                @III
5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q13,Q13,Q1                  @II pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip)
5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q9,D17                      @III pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row)))
5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VZIP.8      D22,D23                     @III
5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D28,Q14                     @II vmovn_s16(pi2_tmp_cur_row.val[0])
5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q10,Q10,D22                 @III pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset)
5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV        Q6,Q15                      @III pu1_cur_row = pu1_next_row
5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q13,Q13,Q2                  @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip))
5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r7,r7,#1                    @III Decrement the ht_tmp loop count by 1
5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q10,Q10,Q1                  @III pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip)
5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r7,#1                       @III
5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D29,Q13                     @II vmovn_s16(pi2_tmp_cur_row.val[1])
5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q10,Q10,Q2                  @III pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip))
5720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q9,Q9,D23                   @III pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset)
5740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q9,Q9,Q1                    @III pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip)
5760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VST1.8      {Q14},[r0],r1               @II vst1q_u8(pu1_src_cpy, pu1_cur_row)
5780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q9,Q9,Q2                    @III pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip))
5790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BGT         PU1_SRC_LOOP                @If not equal jump to PU1_SRC_LOOP
5810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BLT         INNER_LOOP_DONE
5820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r11,r0,r1,LSL #1            @*pu1_src + src_strd
5850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D20,Q10                     @III vmovn_s16(pi2_tmp_cur_row.val[0])
5860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r5,r12,r7                   @ht_tmp - row
5870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r14,r5,LSL #1            @pu1_src_left_cpy[(ht_tmp - row) * 2]
5890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D21,Q9                      @III vmovn_s16(pi2_tmp_cur_row.val[1])
5900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r7,#1
5910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r4,[r0,#16]                 @load the value pu1_src_cpy[16 - src_strd]
5930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D16,[r11]!                  @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
5940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D17,[r11]                   @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
5950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,#8
5960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r9,[r0,#17]                 @load the value pu1_src_cpy[17 - src_strd]
5970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
5980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         NEXT_ROW_POINTER_ASSIGNED_3
5990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @Loads pu1_avail
6000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r5,#3]                  @pu1_avail[3]
6010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r5,#0
6020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBNE       r8,r11,#4                   @pu1_src[src_strd - 2]
6030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_3:
6050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r5,[r8,#2]
6060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VST1.8      {Q10},[r0],r1               @III vst1q_u8(pu1_src_cpy, pu1_cur_row)
6070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r0,#14]                 @pu1_src_cpy[14]
6080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r8,r8,r4                    @pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]
6100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.16     D19[3],r5                   @vsetq_lane_u8
6110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r0,#15]                @pu1_src_cpy[15]
6120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r8,#0
6140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q9,Q9,Q8,#14                @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14)
6150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,r10,r9                  @pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
6160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r8,#0
6180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D28,[r2]                    @edge_idx_tbl = vld1_s8(gi1_table_edge_idx)
6190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r8,#1                       @SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd])
6200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r10,#0
6220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[6],r8                   @sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0)
6230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r10,#0
6240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r10,#1                      @SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
6260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[7],r10                  @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1)
6270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q10,Q6,Q9                   @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp)
6280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q11,Q6,Q9                   @vcltq_u8(pu1_cur_row, pu1_next_row_tmp)
6300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q11,Q11,Q10                 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
6310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q9,Q0,Q7                    @edge_idx = vaddq_s8(const_2, sign_up)
6330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q9,Q9,Q11                   @edge_idx = vaddq_s8(edge_idx, sign_down)
6340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D18,{D28},D18               @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
6350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D19,{D28},D19               @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx))
6360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VAND        Q9,Q9,Q4                    @edge_idx = vandq_s8(edge_idx, au1_mask)
6380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q10,D12                     @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row)))
6400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VUZP.8      D18,D19
6410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D22,{D6},D18
6430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D23,{D7},D19
6440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q9,D13                      @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row)))
6460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VZIP.8      D22,D23
6470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q10,Q10,D22                 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset)
6490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q10,Q10,Q1                  @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip)
6500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q10,Q10,Q2                  @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip))
6510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q9,Q9,D23                   @pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset)
6530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q9,Q9,Q1                    @pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip)
6540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q9,Q9,Q2                    @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip))
6550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarINNER_LOOP_DONE:
6580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r8,[sp,#0x118]              @Loads ht
6600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D20,Q10                     @III vmovn_s16(pi2_tmp_cur_row.val[0])
6610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r5,sp,#0x4B                 @*au1_src_left_tmp
6620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LSL         r8,r8,#1
6640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D21,Q9                      @III vmovn_s16(pi2_tmp_cur_row.val[1])
6650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r11,[sp,#0x104]             @Loads *pu1_src_left
6660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP:
6680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[r5],#4                  @au1_src_left_tmp[row]
6690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r8,r8,#4
6700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STR         r7,[r11],#4                 @pu1_src_left[row] = au1_src_left_tmp[row]
6710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SRC_LEFT_LOOP
6720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r6,r6,#16                   @Decrement the wd loop count by 16
6740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VST1.8      {Q10},[r0],r1               @III vst1q_u8(pu1_src_cpy, pu1_cur_row)
6750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r6,#8                       @Check whether residue remains
6760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BLT         RE_ASSINING_LOOP            @Jump to re-assigning loop
6780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x114]              @Loads wd
6790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r0,[sp,#0x02]               @Loads *pu1_src
6800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r7,r7,r6
6810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r0,r0,r7
6820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BGT         WIDTH_LOOP_16               @If not equal jump to width_loop
6830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         WIDTH_RESIDUE               @If residue remains jump to residue loop
6840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWD_16_HT_4_LOOP:
6860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x114]              @Loads wd
6870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @Loads pu1_avail
6890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r6,r7                       @col == wd
6900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDREQB      r8,[r5]                     @pu1_avail[0]
6920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVNE       r8,#-1
6930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D8[0],r8                    @au1_mask = vsetq_lane_s8(-1, au1_mask, 0)
6940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r6,#16                      @if(col == 16)
6960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D8[1],r8                    @au1_mask = vsetq_lane_s8(-1, au1_mask, 0)
6970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
6980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SKIP_AU1_MASK_VAL_WD_16_HT_4
6990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r5,#1]                  @pu1_avail[1]
7000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D9[6],r8                    @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15)
7010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D9[7],r8                    @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15)
7020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSKIP_AU1_MASK_VAL_WD_16_HT_4:
7040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r5,#2]                 @pu1_avail[2]
7050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBEQ       r8,r0,r1                    @pu1_src - src_strd
7060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r11,#0
7080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVNE       r8,r3
7090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D12,[r0]!                   @pu1_cur_row = vld1q_u8(pu1_src)
7100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D13,[r0]                    @pu1_cur_row = vld1q_u8(pu1_src)
7110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r0,#8
7120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r8,#2                    @pu1_src - src_strd + 2
7130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r3,r3,#16
7150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D10,[r8]!                   @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
7160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D11,[r8]                    @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
7170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r8,#8
7180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r5,sp,#0x4B                 @*au1_src_left_tmp
7190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r4,[sp,#0x118]              @Loads ht
7210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q7,Q6,Q5                    @vcgtq_u8(pu1_cur_row, pu1_top_row)
7220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x114]              @Loads wd
7230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r7,r7,r6                    @(wd - col)
7250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q8,Q6,Q5                    @vcltq_u8(pu1_cur_row, pu1_top_row)
7260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r7,r7,#14                   @15 + (wd - col)
7270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r8,[sp,#0x100]              @Loads *pu1_src
7290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q7,Q8,Q7                    @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
7300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r7,r8,r7                    @pu1_src[0 * src_strd + 15 + (wd - col)]
7310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP_WD_16_HT_4:
7330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r8,[r7]                     @load the value and increment by src_strd
7340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r4,r4,#1                    @decrement the loop count
7350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRH        r8,[r5],#2                  @store it in the stack pointer
7370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r7,r7,r1
7380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         AU1_SRC_LEFT_LOOP_WD_16_HT_4
7390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.I8     Q9,#0
7410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         r7,r12                      @row count, move ht_tmp to r7
7420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP_WD_16_HT_4:
7440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r9,r0,r1                    @*pu1_src + src_strd
7450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @Loads pu1_avail
7470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D16,[r9]!                   @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
7480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D17,[r9]                    @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
7490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r9,#8
7500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r5,#3]                  @pu1_avail[3]
7510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r12,r7                  @ht_tmp - row
7530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r14,r11,LSL #1           @pu1_src_left_cpy[(ht_tmp - row) * 2]
7540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r8,#2                    @pu1_src_left_cpy[(ht_tmp - row + 1) * 2]
7550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r5,#0
7570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         NEXT_ROW_POINTER_ASSIGNED_WD_16_HT_4
7580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r7,#1
7590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBEQ       r8,r9,#2                    @pu1_src[src_strd - 2]
7600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_WD_16_HT_4:
7620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r5,[r8]
7630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.16     D19[3],r5                   @vsetq_lane_u8
7640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q9,Q9,Q8,#14                @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14)
7650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r7,r12
7670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BLT         SIGN_UP_CHANGE_WD_16_HT_4
7680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @Loads pu1_avail
7690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r5,#2]                  @pu1_avail[2]
7700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r5,#0
7710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SIGN_UP_CHANGE_DONE_WD_16_HT_4
7720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_WD_16_HT_4:
7740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r0,#14]                 @pu1_src_cpy[14]
7750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r9,r0,r1
7760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r9,#16]                 @load the value pu1_src_cpy[16 - src_strd]
7780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r0,#15]                @pu1_src_cpy[15]
7800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r8,r8,r5                    @pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]
7810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r9,#17]                @load the value pu1_src_cpy[17 - src_strd]
7830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r8,#0
7840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r8,#0
7860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,r10,r11                 @pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
7870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r8,#1                       @SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd])
7890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r10,#0
7910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[6],r8                   @sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0)
7920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r10,#0
7930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r10,#1                      @SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
7950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[7],r10                  @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1)
7960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
7970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE_WD_16_HT_4:
7980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D20,[r2]                    @edge_idx_tbl = vld1_s8(gi1_table_edge_idx)
7990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q11,Q6,Q9                   @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp)
8000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q12,Q6,Q9                   @vcltq_u8(pu1_cur_row, pu1_next_row_tmp)
8020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q12,Q12,Q11                 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
8030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q13,Q0,Q7                   @edge_idx = vaddq_s8(const_2, sign_up)
8050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q13,Q13,Q12                 @edge_idx = vaddq_s8(edge_idx, sign_down)
8060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VNEG.S8     Q7,Q12                      @sign_up = vnegq_s8(sign_down)
8080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D26,{D20},D26               @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
8090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D27,{D20},D27               @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx))
8110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q7,Q7,Q7,#2                 @sign_up = vextq_s8(sign_up, sign_up, 2)
8120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q14,D12                     @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row)))
8140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VAND        Q13,Q13,Q4                  @edge_idx = vandq_s8(edge_idx, au1_mask)
8150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VUZP.8      D26,D27
8180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D24,{D6},D26
8190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D25,{D7},D27
8200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VZIP.8      D24,D25
8210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q15,D13                     @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_cur_row)))
8230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q14,Q14,D24                 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset)
8240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q14,Q14,Q1                  @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip)
8260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q14,Q14,Q2                  @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip))
8270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV        Q6,Q8                       @pu1_cur_row = pu1_next_row
8290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q15,Q15,D25                 @pi2_tmp_cur_row.val[1] = vaddw_s8(pi2_tmp_cur_row.val[1], offset)
8300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q15,Q15,Q1                  @pi2_tmp_cur_row.val[1] = vmaxq_s16(pi2_tmp_cur_row.val[1], const_min_clip)
8320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q15,Q15,Q2                  @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[1]), const_max_clip))
8330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D28,Q14                     @vmovn_s16(pi2_tmp_cur_row.val[0])
8350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D29,Q15                     @vmovn_s16(pi2_tmp_cur_row.val[1])
8360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r7,r7,#1                    @Decrement the ht_tmp loop count by 1
8380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VST1.8      {Q14},[r0],r1               @vst1q_u8(pu1_src_cpy, pu1_cur_row)
8390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         PU1_SRC_LOOP_WD_16_HT_4     @If not equal jump to PU1_SRC_LOOP_WD_16_HT_4
8400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r8,[sp,#0x118]              @Loads ht
8420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r5,sp,#0x4B                 @*au1_src_left_tmp
8430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r11,[sp,#0x104]             @Loads *pu1_src_left
8440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP_WD_16_HT_4:
8460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[r5],#4                  @au1_src_left_tmp[row]
8470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r8,r8,#2
8480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STR         r7,[r11],#4                 @pu1_src_left[row] = au1_src_left_tmp[row]
8490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SRC_LEFT_LOOP_WD_16_HT_4
8500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r6,r6,#16                   @Decrement the wd loop count by 16
8520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BLE         RE_ASSINING_LOOP            @Jump to re-assigning loop
8530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BGT         WD_16_HT_4_LOOP             @If not equal jump to width_loop
8540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarWIDTH_RESIDUE:
8560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x114]              @Loads wd
8570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @Loads pu1_avail
8590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r6,r7                       @wd_residue == wd
8600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDREQB      r8,[r5]                     @pu1_avail[0]
8620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVNE       r8,#-1
8640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r5,#1]                 @pu1_avail[1]
8650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r9,[r5,#2]                  @pu1_avail[2]
8670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      d8[0],r8                    @au1_mask = vsetq_lane_s8(-1, au1_mask, 0)
8680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r9,#0
8690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBEQ       r10,r0,r1                   @pu1_src - src_strd
8710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      d8[1],r8                    @au1_mask = vsetq_lane_s8(-1, au1_mask, 0)
8720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVNE       r10,r3
8730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r10,r10,#2                  @pu1_src - src_strd + 2
8750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      d8[6],r11                   @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15)
8760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r5,sp,#0x4B                 @*au1_src_left_tmp
8770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r4,[sp,#0x118]              @Loads ht
8790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      d8[7],r11                   @au1_mask = vsetq_lane_s8(pu1_avail[1], au1_mask, 15)
8800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x114]              @Loads wd
8810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r8,[sp,#0x100]              @Loads *pu1_src
8830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D10,[r10]!                  @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
8840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D11,[r10]                   @pu1_top_row = vld1q_u8(pu1_src - src_strd + 2)
8850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,#8
8860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r7,r7,#2                    @(wd - 2)
8870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r7,r8,r7                    @pu1_src[0 * src_strd + (wd - 2)]
8890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarAU1_SRC_LEFT_LOOP_RESIDUE:
8910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r8,[r7]                     @load the value and increment by src_strd
8920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r7,r7,r1
8930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRH        r8,[r5],#2                  @store it in the stack pointer
8940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r4,r4,#1                    @decrement the loop count
8950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         AU1_SRC_LEFT_LOOP_RESIDUE
8960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
8970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D12,[r0]!                   @pu1_cur_row = vld1q_u8(pu1_src)
8980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D13,[r0]                    @pu1_cur_row = vld1q_u8(pu1_src)
8990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r0,#8
9000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.I8     Q9,#0
9020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q7,Q6,Q5                    @vcgtq_u8(pu1_cur_row, pu1_top_row)
9030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q8,Q6,Q5                    @vcltq_u8(pu1_cur_row, pu1_top_row)
9050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q7,Q8,Q7                    @sign_up = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
9060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         r7,r12                      @row count, move ht_tmp to r7
9070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarPU1_SRC_LOOP_RESIDUE:
9090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r9,r0,r1                    @*pu1_src + src_strd
9100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r11,r12,r7                  @ht_tmp - row
9120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D16,[r9]!                   @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
9130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D17,[r9]                    @pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd)
9140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r9,#8
9150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @Loads pu1_avail
9160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r5,#3]                  @pu1_avail[3]
9180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r14,r11,LSL #1           @pu1_src_left_cpy[(ht_tmp - row) * 2]
9190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r5,#0
9210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r8,r8,#2                    @pu1_src_left_cpy[(ht_tmp - row + 1) * 2]
9220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         NEXT_ROW_POINTER_ASSIGNED_RESIDUE
9240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r7,#1
9250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBEQ       r8,r9,#2                    @pu1_src[src_strd - 2]
9260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarNEXT_ROW_POINTER_ASSIGNED_RESIDUE:
9280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r8]
9290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r8,#1]
9310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D19[6],r5                   @vsetq_lane_u8
9320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r7,r12
9330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D19[7],r8                   @vsetq_lane_u8
9350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q9,Q9,Q8,#14                @pu1_next_row_tmp = vextq_u8(pu1_next_row_tmp, pu1_next_row, 14)
9360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BLT         SIGN_UP_CHANGE_RESIDUE
9380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r5,[sp,#0x108]              @Loads pu1_avail
9390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r5,#2]                  @pu1_avail[2]
9400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r5,#0
9410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SIGN_UP_CHANGE_DONE_RESIDUE
9420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_RESIDUE:
9440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r8,[r0,#14]                 @pu1_src_cpy[14]
9450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r9,r0,r1
9460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r5,[r9,#16]                 @load the value pu1_src_cpy[16 - src_strd]
9480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r10,[r0,#15]                @pu1_src_cpy[15]
9500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r8,r8,r5                    @pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd]
9510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRB        r11,[r9,#17]                @load the value pu1_src_cpy[17 - src_strd]
9530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r8,#0
9540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r8,#0
9560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,r10,r11                 @pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
9570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r8,#1                       @SIGN(pu1_src_cpy[14] - pu1_src_cpy[16 - src_strd])
9590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         r10,#0
9610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[6],r8                   @sign_up = sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[14] -pu1_src_cpy[16 - src_strd]), sign_up, 0)
9620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MVNLT       r10,#0
9630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOVGT       r10,#1                      @SIGN(pu1_src_cpy[15] - pu1_src_cpy[17 - src_strd]
9650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV.8      D15[7],r10                  @sign_up = vsetq_lane_s8(SIGN(pu1_src_cpy[15] -pu1_src_cpy[17 - src_strd]), sign_up, 1)
9660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSIGN_UP_CHANGE_DONE_RESIDUE:
9680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D20,[r2]                    @edge_idx_tbl = vld1_s8(gi1_table_edge_idx)
9690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCGT.U8     Q11,Q6,Q9                   @vcgtq_u8(pu1_cur_row, pu1_next_row_tmp)
9700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VCLT.U8     Q12,Q6,Q9                   @vcltq_u8(pu1_cur_row, pu1_next_row_tmp)
9720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VSUB.U8     Q12,Q12,Q11                 @sign_down = vreinterpretq_s8_u8(vsubq_u8(cmp_lt, cmp_gt))
9730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q13,Q0,Q7                   @edge_idx = vaddq_s8(const_2, sign_up)
9750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADD.I8     Q13,Q13,Q12                 @edge_idx = vaddq_s8(edge_idx, sign_down)
9760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VNEG.S8     Q7,Q12                      @sign_up = vnegq_s8(sign_down)
9780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D26,{D20},D26               @vtbl1_s8(edge_idx_tbl, vget_low_s8(edge_idx))
9790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D27,{D20},D27               @vtbl1_s8(edge_idx_tbl, vget_high_s8(edge_idx))
9810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VEXT.8      Q7,Q7,Q7,#2                 @sign_up = vextq_s8(sign_up, sign_up, 14)
9820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVL.U8    Q14,D12                     @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_cur_row)))
9840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VAND        Q13,Q13,Q4                  @edge_idx = vandq_s8(edge_idx, au1_mask)
9850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VUZP.8      D26,D27
9880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D24,{D6},D26
9890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VTBL.8      D25,{D7},D27
9900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VZIP.8      D24,D25
9910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOV        Q6,Q8                       @pu1_cur_row = pu1_next_row
9930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VADDW.S8    Q14,Q14,D24                 @pi2_tmp_cur_row.val[0] = vaddw_s8(pi2_tmp_cur_row.val[0], offset)
9940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMAX.S16    Q14,Q14,Q1                  @pi2_tmp_cur_row.val[0] = vmaxq_s16(pi2_tmp_cur_row.val[0], const_min_clip)
9960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMIN.U16    Q14,Q14,Q2                  @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vminq_u16(vreinterpretq_u16_s16(pi2_tmp_cur_row.val[0]), const_max_clip))
9970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
9980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r7,r7,#1                    @Decrement the ht_tmp loop count by 1
9990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VMOVN.I16   D30,Q14                     @vmovn_s16(pi2_tmp_cur_row.val[0])
10000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VST1.8      {D30},[r0],r1               @vst1q_u8(pu1_src_cpy, pu1_cur_row)
10020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         PU1_SRC_LOOP_RESIDUE        @If not equal jump to PU1_SRC_LOOP
10040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r8,[sp,#0x118]              @Loads ht
10060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r5,sp,#0x4B                 @*au1_src_left_tmp
10070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r11,[sp,#0x104]             @Loads *pu1_src_left
10090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_LEFT_LOOP_RESIDUE:
10110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[r5],#4                  @au1_src_left_tmp[row]
10120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r8,r8,#2
10130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STR         r7,[r11],#4                 @pu1_src_left[row] = au1_src_left_tmp[row]
10140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SRC_LEFT_LOOP_RESIDUE
10150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarRE_ASSINING_LOOP:
10180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r7,[sp,#0x114]              @Loads wd
10190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r8,[sp,#0x118]              @Loads ht
10200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r0,[sp,#0x100]              @Loads *pu1_src
10220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r10,r7,#2                   @wd - 2
10230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r9,[sp,#6]
10250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         r8,r8,#1                    @ht - 1
10260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRH        r9,[r0,r10]                 @pu1_src_org[0] = u1_pos_0_0_tmp
10280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MLA         r6,r8,r1,r0                 @pu1_src[(ht - 1) * src_strd]
10290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r4,[sp,#0xFC]               @Loads pu1_src_top_left
10310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r9,[sp,#8]
10330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         r12,sp,#10
10340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRH        r9,[r6]                     @pu1_src_org[(ht - 1) * src_strd] = u1_pos_wd_ht_tmp_u
10360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDRH        r10,[sp]                    @load u1_src_top_left_tmp from stack pointer
10380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    STRH        r10,[r4]                    @*pu1_src_top_left = u1_src_top_left_tmp
10390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         r3,[sp,#0x10C]              @Loads pu1_src_top
10400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarSRC_TOP_LOOP:
10420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VLD1.8      D0,[r12]!                   @pu1_src_top[col] = au1_src_top_tmp[col]
10430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        r7,r7,#8                    @Decrement the width
10440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    VST1.8      D0,[r3]!                    @pu1_src_top[col] = au1_src_top_tmp[col]
10450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         SRC_TOP_LOOP
10460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish MahendrakarEND_LOOPS:
10480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         sp,sp,#0xD4
10490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDMFD       sp!,{r4-r12,r15}            @Reload the registers from SP
10500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
10520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1053