10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///*****************************************************************************
20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Licensed under the Apache License, Version 2.0 (the "License");
60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* you may not use this file except in compliance with the License.
70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* You may obtain a copy of the License at:
80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* http://www.apache.org/licenses/LICENSE-2.0
100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* Unless required by applicable law or agreed to in writing, software
120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* distributed under the License is distributed on an "AS IS" BASIS,
130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* See the License for the specific language governing permissions and
150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* limitations under the License.
160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*****************************************************************************/
180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///**
190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///*******************************************************************************
200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //file
210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  ihevcd_fmt_conv_420sp_to_420p.s
220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //brief
240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  contains function definitions for format conversions
250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //author
270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  ittiam
280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //par list of functions:
300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//* //remarks
330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  none
340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*
350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*******************************************************************************/
360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.text
380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.include "ihevc_neon_macros.s"
400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar///*****************************************************************************
450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Function Name    : neon_copy_yuv420sp_to_yuv420p()                       *
470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Description      : This function conversts the image from YUV420sP color  *
490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     space to 420SP color space(UV interleaved).                 *
500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Arguments        : x0           pu1_src_y                                 *
520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     x1           pu1_src_uv                                *
530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     x2           pu1_dest_y                                *
540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     x3           pu1_dest_u                               *
550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #40]    pu1_dest_v                               *
560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #44]    u2_width                                 *
570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #48]    u2_height                                   *
580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #52]    u2_stridey                                *
590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #56]    u2_strideuv                               *
600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #60]    u2_dest_stridey                           *
610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #64]    u2_dest_strideuv                          *
620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #68]    is_u_first                                *
630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     [x13 #72]    disable_luma_copy                         *
640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Values Returned  : None                                                   *
660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Register Usage   : x0 - x14                                               *
680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Stack Usage      : 40 Bytes                                               *
700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Interruptibility : Interruptible                                          *
720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Known Limitations                                                         *
740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*       Assumptions: Image Width:     Assumed to be multiple of 2 and       *
750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                     Image Height:    Assumed to be even.                   *
760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*  Revision History :                                                        *
780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*         16 05 2012   Naveen SR     draft                                     *
800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*                                                                            *
810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//*****************************************************************************/
820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.globl ihevcd_fmt_conv_420sp_to_420p_av8
840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar.type ihevcd_fmt_conv_420sp_to_420p_av8, %function
860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarihevcd_fmt_conv_420sp_to_420p_av8:
880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // STMFD sp!,{x4-x12, x14}
890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    push_v_regs
900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    stp         x19, x20,[sp,#-16]!
910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x15, x4
920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x8, x5                      ////Load u2_width
930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x9, x6                      ////Load u2_height
940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         w5, [sp,#88]                ////Load u2_dest_stridey
960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x5,w5
970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//    LDR        x6,[sp,#80]                @//Load u2_strideuv
980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x10,x7,x8                   //// Src Y increment
1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x11,x5,x8                   //// Dst Y increment
1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         w5, [sp,#112]               ////Load disable_luma_copy flag
1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x5,w5
1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         x5,#0                       ////skip luma if disable_luma_copy is non-zero
1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BNE         uv_copy_start
1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ///* Copy Y */
1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         x4,x9                       //// Copying height
1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakary_row_loop:
1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         x6,x8                       //// Copying width
1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakary_col_loop:
1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x6,x6,#16
1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v0.8b, v1.8b},[x0],#16
1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v0.8b, v1.8b},[x2],#16
1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         x6,#16
1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BGE         y_col_loop
1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         x6,#0
1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         y_col_loop_end
1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ////Ex if width is 162, above loop will process 160 pixels. And
1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ////Both source and destination will point to 146th pixel and then 16 bytes will be read
1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    //// and written using VLD1 and VST1
1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x20,x6,#16
1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    neg         x6, x20
1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x0,x0,x6
1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x2,x2,x6
1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld1         {v0.8b, v1.8b}, [x0],#16
1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    st1         {v0.8b, v1.8b}, [x2],#16
1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakary_col_loop_end:
1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         x0, x0, x10
1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         x2, x2, x11
1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        x4, x4, #1
1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BGT         y_row_loop
1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ///* Copy UV */
1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakaruv_copy_start:
1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         w5, [sp,#96]                ////Load u2_dest_strideuv
1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x5,w5
1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         w7, [sp,#80]                ////Load u2_strideuv
1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x7,w7
1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LSR         x9, x9, #1                  //// height/2
1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//    MOV     x8,x8,LSR #1            @// Width/2
1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x10,x7,x8                   //// Src UV increment
1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LSR         x11, x8, #1
1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x11,x5,x11                  //// Dst U and V increment
1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    mov         x5, x15                     ////Load pu1_dest_v
1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    LDR         w4, [sp,#104]               ////Load is_u_first_flag
1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sxtw        x4,w4
1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         x4,#0                       ////Swap U and V dest if is_u_first_flag is zero
1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x4, x5, x4,EQ
1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x5, x3, x5,EQ
1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    csel        x3, x4, x3,EQ
1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         x4,x9                       //// Copying height
1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakaruv_row_loop:
1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    MOV         x6,x8                       //// Copying width
1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakaruv_col_loop:
1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x6,x6,#16
1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    prfm        PLDL1KEEP,[x1,#128]
1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld2         {v0.8b, v1.8b},[x1],#16
1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ST1         {v0.8b},[x3],#8
1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ST1         {v1.8b},[x5],#8
1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         x6,#16
1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BGE         uv_col_loop
1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    CMP         x6,#0
1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BEQ         uv_col_loop_end
1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ////Ex if width is 162, above loop will process 160 pixels. And
1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ////Both source and destination will point to 146th pixel and then 16 bytes will be read
1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    //// and written using VLD1 and VST1
1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    sub         x20,x6,#16
1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    neg         x6, x20
1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x1,x1,x6
1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x3,x3,x6,LSR #1
1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUB         x5,x5,x6,LSR #1
1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ld2         {v0.8b, v1.8b}, [x1],#16
1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ST1         {v0.8b},[x3],#8
1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ST1         {v1.8b},[x5],#8
1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakaruv_col_loop_end:
1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         x1, x1, x10
1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         x3, x3, x11
1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ADD         x5, x5, x11
1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    SUBS        x4, x4, #1
1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    BGT         uv_row_loop
1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarexit:
2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    // LDMFD sp!,{x4-x12, pc}
2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ldp         x19, x20,[sp],#16
2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    pop_v_regs
2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    ret
2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
210