18d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/******************************************************************************
28d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
38d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Copyright (C) 2015 The Android Open Source Project
48d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
58d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Licensed under the Apache License, Version 2.0 (the "License");
68d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * you may not use this file except in compliance with the License.
78d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * You may obtain a copy of the License at:
88d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
98d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * http://www.apache.org/licenses/LICENSE-2.0
108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Unless required by applicable law or agreed to in writing, software
128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * distributed under the License is distributed on an "AS IS" BASIS,
138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * See the License for the specific language governing permissions and
158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * limitations under the License.
168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *****************************************************************************
188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*/
208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.text
228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S.p2align 2
238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@/*****************************************************************************
258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Function Name    : IH264D_CXA8_YUV420toYUV420SP_UV()                      *
278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Description      : This function conversts the image from YUV420P color   *
298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     space to 420SP color space(UV interleaved).        *
308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Arguments        : R0           pu1_y                                     *
328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     R1           pu1_u                                     *
338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     R2           pu1_v                                     *
348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     R3           pu1_dest_y                                *
358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #40]    pu1_dest_uv                               *
368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #44]    u2_height                                 *
378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #48]    u2_width                                  *
388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #52]    u2_stridey                                *
398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #56]    u2_strideu                                *
408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #60]    u2_stridev                                *
418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #64]    u2_dest_stride_y                          *
428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #68]    u2_dest_stride_uv                         *
438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     [R13 #72]    convert_uv_only                           *
448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Values Returned  : None                                                   *
468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Register Usage   : R0 - R14                                               *
488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Stack Usage      : 40 Bytes                                               *
508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Interruptibility : Interruptible                                          *
528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Known Limitations                                                         *
548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     greater than or equal to 16                *
568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                     Image Height:    Assumed to be even.                   *
578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*  Revision History :                                                        *
598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*         07 06 2010   Varshita        Draft                                 *
618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*         07 06 2010   Naveen Kr T     Completed                             *
628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*                                                                            *
638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*****************************************************************************/
648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264e_fmt_conv_420p_to_420sp_a9q
658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264e_fmt_conv_420p_to_420sp_a9q:
678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @// push the registers on the stack
698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    stmfd         sp!, {r4-r12, lr}
708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r4, [sp, #72]         @// Load convert_uv_only
728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmp           r4, #1
748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    beq           yuv420sp_uv_chroma
758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @/* Do the preprocessing before the main loops start */
768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @// Load the parameters from stack
778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r4, [sp, #44]         @// Load u2_height from stack
788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r5, [sp, #48]         @// Load u2_width from stack
798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r7, [sp, #52]         @// Load u2_stridey from stack
808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r8, [sp, #64]         @// Load u2_dest_stride_y from stack
818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r7, r7, r5            @// Source increment
828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r8, r8, r5            @// Destination increment
838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Syuv420sp_uv_row_loop_y:
858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov           r6, r5
868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Syuv420sp_uv_col_loop_y:
888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pld           [r0, #128]
898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vld1.8        {d0, d1}, [r0]!
908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vst1.8        {d0, d1}, [r3]!
918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r6, r6, #16
928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmp           r6, #15
938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bgt           yuv420sp_uv_col_loop_y
948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmp           r6, #0
968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    beq           yuv420sp_uv_row_loop_end_y
978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @//Ex if width is 162, above loop will process 160 pixels. And
998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
1008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @// and written using VLD1 and VST1
1018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rsb           r6, r6, #16
1028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r0, r0, r6
1038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r3, r3, r6
1048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vld1.8        {d0, d1}, [r0]!
1068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vst1.8        {d0, d1}, [r3]!
1078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Syuv420sp_uv_row_loop_end_y:
1098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r0, r0, r7
1108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r3, r3, r8
1118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs          r4, r4, #1
1128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bgt           yuv420sp_uv_row_loop_y
1138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Syuv420sp_uv_chroma:
1158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r3, [sp, #40]         @// Load pu1_dest_uv from stack
1178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r4, [sp, #44]         @// Load u2_height from stack
1198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r5, [sp, #48]         @// Load u2_width from stack
1218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r7, [sp, #56]         @// Load u2_strideu from stack
1248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r8, [sp, #68]         @// Load u2_dest_stride_uv from stack
1268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r7, r7, r5, lsr #1    @// Source increment
1288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r8, r8, r5            @// Destination increment
1308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov           r5, r5, lsr #1
1328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov           r4, r4, lsr #1
1338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r3, [sp, #40]         @// Load pu1_dest_uv from stack
134c72323e7234ceda6c2c5be5bc2622d87cd4fbaf5Harish Mahendrakar
1358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Syuv420sp_uv_row_loop_uv:
1368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov           r6, r5
1378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Syuv420sp_uv_col_loop_uv:
1408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pld           [r1, #128]
1418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pld           [r2, #128]
1428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vld1.8        d0, [r1]!
1438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vld1.8        d1, [r2]!
1448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vst2.8        {d0, d1}, [r3]!
1458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r6, r6, #8
1468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmp           r6, #7
1478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bgt           yuv420sp_uv_col_loop_uv
1488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    cmp           r6, #0
1508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    beq           yuv420sp_uv_row_loop_end_uv
1518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
1528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @//Ex if width is 162, above loop will process 160 pixels. And
1538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
1548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @// and written using VLD1 and VST1
1558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rsb           r6, r6, #8
1568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r1, r1, r6
1578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r2, r2, r6
1588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r3, r3, r6, lsl #1
1598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vld1.8        d0, [r1]!
1618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vld1.8        d1, [r2]!
1628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vst2.8        {d0, d1}, [r3]!
1638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Syuv420sp_uv_row_loop_end_uv:
1658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r1, r1, r7
1668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r2, r2, r7
1678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r3, r3, r8
1688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs          r4, r4, #1
1698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    bgt           yuv420sp_uv_row_loop_uv
1708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @//POP THE REGISTERS
1718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldmfd         sp!, {r4-r12, pc}
1728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ /**
1788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *******************************************************************************
1798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
1808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @brief ih264e_fmt_conv_422i_to_420sp_a9q
1818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *     Function used from format conversion or frame copy
1828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
1838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
1848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
1858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *Inputs             : r0 - pu1_y            -   UWORD8 pointer to y plane.
1868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *                     r1 - pu1_u            -   UWORD8 pointer to u plane.
1878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *                     r2 - pu1_v            -   UWORD8 pointer to u plane.
1888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *                     r3 - pu2_yuv422i      -   UWORD16 pointer to yuv422iimage.
1898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *             stack + 40 - u4_width         -   Width of the Y plane.
1908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *                     44 - u4_height        -   Height of the Y plane.
1918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *                     48 - u4_stride_y      -   Stride in pixels of Y plane.
1928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *                     52 - u4_stride_u      -   Stride in pixels of U plane.
1938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *                     56 - u4_stride_v      -   Stride in pixels of V plane.
1948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *                     60 - u4_stride_yuv422i-   Stride in pixels of yuv422i image.
1958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
1968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @par   Description
1978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Function used from copying or converting a reference frame to display buffer
1988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * in non shared mode
1998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @param[in] pu1_y_dst
2018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   Output Y pointer
2028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @param[in] pu1_u_dst
2048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   Output U/UV pointer ( UV is interleaved in the same format as that of input)
2058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @param[in] pu1_v_dst
2078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   Output V pointer ( used in 420P output case)
2088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @param[in] u4_dst_y_strd
2108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   Stride of destination Y buffer
2118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @param[in] u4_dst_u_strd
2138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   Stride of destination  U/V buffer
2148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @param[in] blocking
2178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   To indicate whether format conversion should wait till frame is reconstructed
2188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   and then return after complete copy is done. To be set to 1 when called at the
2198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   end of frame processing and set to 0 when called between frame processing modules
2208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *   in order to utilize available MCPS
2218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @returns Error from IH264E_ERROR_T
2238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * @remarks
2258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Assumes that the stride of U and V buffers are same.
2268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * This is correct in most cases
2278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * If a case comes where this is not true we need to modify the fmt conversion funcnions called inside also
2288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Since we read 4 pixels ata time the width should be aligned to 4
2298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * In assembly width should be aligned to 16 and height to 2.
2308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ * Revision History :
2338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *         DD MM YYYY   Author(s)              Changes (Describe the changes made)
2348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *         07 06 2010   Harinarayanan K K       Adapeted to 422p
2358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *
2368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ *******************************************************************************
2378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@ */
2388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@//`
2408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@*/
2418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    .global ih264e_fmt_conv_422i_to_420sp_a9q
2428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Sih264e_fmt_conv_422i_to_420sp_a9q:
2438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    stmfd         sp!, {r4-r12, lr}     @// Back the register which are used
2448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @/* Do the preprocessing before the main loops start */
2488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @// Load the parameters from stack
2498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r4, [sp, #48]         @// Load u4_stride_y       from stack
2508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r5, [sp, #60]         @// Load u4_stride_yuv422i from stack
2528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r6, r0, r4            @// pu1_y_nxt_row       = pu1_y + u4_stride_y
2538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r7, [sp, #40]         @// Load u4_width          from stack
2558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r8, r3, r5, lsl #1    @// pu2_yuv422i_nxt_row = pu2_yuv422i_y + u4_stride_yuv422i(2 Bytes for each pixel)
2568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r9, [sp, #52]         @// Load u4_stride_u       from stack
2588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r12, r4, r7           @// u2_offset1          = u4_stride_y - u4_width
2598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@LDR            r10,[sp,#56]                ;// Load u4_stride_v       from stack
2618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r14, r5, r7           @// u2_offset_yuv422i   = u4_stride_yuv422i - u4_width
2628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldr           r11, [sp, #44]        @// Load u4_height         from stack
2648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    sub           r9, r9, r7            @// u2_offset2          = u4_stride_u - u4_width >> 1
2658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@   SUB         r10,r10,r7,ASR #1           ;// u2_offset3          = u4_stride_v - u4_width >> 1
2678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov           r14, r14, lsl #1      @// u2_offset_yuv422i   = u2_offset_yuv422i * 2
2688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov           r11, r11, asr #1      @// u4_width = u4_width / 2 (u4_width >> 1)
2708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r4, r12, r4           @// u2_offset1 = u2_offset1 + u4_stride_y
2728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r5, r14, r5, lsl #1   @// u2_offset_yuv422i = u2_offset_yuv422i + u4_stride_yuv422i
2738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// Register Assignment
2758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// pu1_y               - r0
2768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// pu1_y_nxt_row       - r6
2778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// pu1_u               - r1
2788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// pu1_v               - r2
2798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// pu2_yuv422i         - r3
2808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// pu2_yuv422i_nxt_row - r8
2818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// u2_offset1          - r4
2828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// u2_offset2          - r9
2838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// u2_offset3          - r10
2848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// u2_offset_yuv422i   - r5
2858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// u4_width / 16       - r7
2868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// u4_height / 2       - r11
2878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S@// inner loop count    - r12
288816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjoyuv422i_to_420sp_height_loop:
2898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mov           r12, r7               @// Inner loop count = u4_width / 16
2918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
292816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjoyuv422i_to_420sp_width_loop:
2938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vld4.8        {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
2948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vld4.8        {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
295816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    sub           r12, r12, #16
2968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vrhadd.u8     d0, d0, d4
2988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vrhadd.u8     d2, d2, d6
2998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vst2.8        {d1, d3}, [r0]!       @// Store the 16 elements of row1 Y
3018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vst2.8        {d5, d7}, [r6]!       @// Store the 16 elements of row2 Y
3028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    vst2.8        {d0, d2}, [r1]!       @// Store the 8 elements of row1/2 U
3048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
305816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    cmp           r12, #15
306816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    bgt           yuv422i_to_420sp_width_loop
307816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    cmp           r12, #0
308816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    beq           yuv422i_to_420sp_row_loop_end
3098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
310816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
311816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    @//Ex if width is 162, above loop will process 160 pixels. And
312816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
313816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    @// and written using VLD1 and VST1
314816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    rsb           r12, r12, #16
315816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    sub           r3, r3, r12, lsl #1
316816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    sub           r8, r8, r12, lsl #1
317816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    sub           r0, r0, r12
318816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    sub           r6, r6, r12
319816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    sub           r1, r1, r12
320816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo
321816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    vld4.8        {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
322816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    vld4.8        {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
323816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo
324816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    vrhadd.u8     d0, d0, d4
325816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    vrhadd.u8     d2, d2, d6
326816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo
327816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    vst2.8        {d1, d3}, [r0]!       @// Store the 16 elements of row1 Y
328816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    vst2.8        {d5, d7}, [r6]!       @// Store the 16 elements of row2 Y
329816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo
330816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    vst2.8        {d0, d2}, [r1]!       @// Store the 8 elements of row1/2 U
331816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo
332816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjoyuv422i_to_420sp_row_loop_end:
3338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    @// Update the buffer pointer so that they will refer to next pair of rows
3348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r0, r0, r4            @// pu1_y               = pu1_y                 + u2_offset1
3358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r6, r6, r4            @// pu1_y_nxt_row       = pu1_y_nxt_row         + u2_offset1
3368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r1, r1, r9            @// pu1_u               = pu1_u                 + u2_offset2
3388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    subs          r11, r11, #1
3398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r3, r3, r5            @// pu2_yuv422i         = pu2_yuv422i           + u2_offset_yuv422i
3418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    add           r8, r8, r5            @// pu2_yuv422i_nxt_row = pu2_yuv422i_nxt_row   + u2_offset_yuv422i
343816974b18c6c216c724c8374c02b8e20adae7e91Martin Storsjo    bgt           yuv422i_to_420sp_height_loop
3448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    ldmfd         sp!, {r4-r12, pc}     @// Restore the register which are used
3458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
348