10c1bc742181ded4930842b46e9507372f0b1b963James Dong; Copyright (C) 2009 The Android Open Source Project
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;
30c1bc742181ded4930842b46e9507372f0b1b963James Dong; Licensed under the Apache License, Version 2.0 (the "License");
40c1bc742181ded4930842b46e9507372f0b1b963James Dong; you may not use this file except in compliance with the License.
50c1bc742181ded4930842b46e9507372f0b1b963James Dong; You may obtain a copy of the License at
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;      http://www.apache.org/licenses/LICENSE-2.0
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;
90c1bc742181ded4930842b46e9507372f0b1b963James Dong; Unless required by applicable law or agreed to in writing, software
100c1bc742181ded4930842b46e9507372f0b1b963James Dong; distributed under the License is distributed on an "AS IS" BASIS,
110c1bc742181ded4930842b46e9507372f0b1b963James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
120c1bc742181ded4930842b46e9507372f0b1b963James Dong; See the License for the specific language governing permissions and
130c1bc742181ded4930842b46e9507372f0b1b963James Dong; limitations under the License.
140c1bc742181ded4930842b46e9507372f0b1b963James Dong
150c1bc742181ded4930842b46e9507372f0b1b963James Dong;-------------------------------------------------------------------------------
160c1bc742181ded4930842b46e9507372f0b1b963James Dong;--
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHorVer
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;--            function
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;--
200c1bc742181ded4930842b46e9507372f0b1b963James Dong;-------------------------------------------------------------------------------
210c1bc742181ded4930842b46e9507372f0b1b963James Dong
220c1bc742181ded4930842b46e9507372f0b1b963James Dong
230c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF  :DEF: H264DEC_WINASM
240c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
250c1bc742181ded4930842b46e9507372f0b1b963James Dong    ELSE
260c1bc742181ded4930842b46e9507372f0b1b963James Dong        REQUIRE8
270c1bc742181ded4930842b46e9507372f0b1b963James Dong        PRESERVE8
280c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
290c1bc742181ded4930842b46e9507372f0b1b963James Dong
300c1bc742181ded4930842b46e9507372f0b1b963James Dong    AREA    |.text|, CODE
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h264bsdInterpolateChromaHorVer register allocation
340c1bc742181ded4930842b46e9507372f0b1b963James Dong
350c1bc742181ded4930842b46e9507372f0b1b963James Dongref     RN 0
360c1bc742181ded4930842b46e9507372f0b1b963James DongptrA    RN 0
370c1bc742181ded4930842b46e9507372f0b1b963James Dong
380c1bc742181ded4930842b46e9507372f0b1b963James Dongmb      RN 1
390c1bc742181ded4930842b46e9507372f0b1b963James Dongblock   RN 1
400c1bc742181ded4930842b46e9507372f0b1b963James Dong
410c1bc742181ded4930842b46e9507372f0b1b963James Dongx0      RN 2
420c1bc742181ded4930842b46e9507372f0b1b963James Dongcount   RN 2
430c1bc742181ded4930842b46e9507372f0b1b963James Dong
440c1bc742181ded4930842b46e9507372f0b1b963James Dongy0      RN 3
450c1bc742181ded4930842b46e9507372f0b1b963James DongvalY    RN 3
460c1bc742181ded4930842b46e9507372f0b1b963James Dong
470c1bc742181ded4930842b46e9507372f0b1b963James Dongwidth   RN 4
480c1bc742181ded4930842b46e9507372f0b1b963James Dong
490c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp4    RN 5
500c1bc742181ded4930842b46e9507372f0b1b963James Dongheight  RN 5
510c1bc742181ded4930842b46e9507372f0b1b963James Dong
520c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp1    RN 6
530c1bc742181ded4930842b46e9507372f0b1b963James Dong
540c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp2    RN 7
550c1bc742181ded4930842b46e9507372f0b1b963James Dong
560c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp3    RN 8
570c1bc742181ded4930842b46e9507372f0b1b963James Dong
580c1bc742181ded4930842b46e9507372f0b1b963James DongvalX    RN 9
590c1bc742181ded4930842b46e9507372f0b1b963James Dong
600c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp5    RN 10
610c1bc742181ded4930842b46e9507372f0b1b963James DongchrPW   RN 10
620c1bc742181ded4930842b46e9507372f0b1b963James Dong
630c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp6    RN 11
640c1bc742181ded4930842b46e9507372f0b1b963James DongchrPH   RN 11
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James DongxFrac   RN 12
670c1bc742181ded4930842b46e9507372f0b1b963James Dong
680c1bc742181ded4930842b46e9507372f0b1b963James Dongc32     RN 14
690c1bc742181ded4930842b46e9507372f0b1b963James DongyFrac   RN 14
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dong;// function exports and imports
720c1bc742181ded4930842b46e9507372f0b1b963James Dong
730c1bc742181ded4930842b46e9507372f0b1b963James Dong    IMPORT  h264bsdFillBlock
740c1bc742181ded4930842b46e9507372f0b1b963James Dong
750c1bc742181ded4930842b46e9507372f0b1b963James Dong    EXPORT  h264bsdInterpolateChromaHorVer
760c1bc742181ded4930842b46e9507372f0b1b963James Dong
770c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  Function arguments
780c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u8 *ref,                   : 0xc4
800c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u8 *predPartChroma,        : 0xc8
810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  i32 x0,                    : 0xcc
820c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  i32 y0,                    : 0xd0
830c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 width,                 : 0xf8
840c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 height,                : 0xfc
850c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 xFrac,                 : 0x100
860c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 yFrac,                 : 0x104
870c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 chromaPartWidth,       : 0x108
880c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 chromaPartHeight       : 0x10c
890c1bc742181ded4930842b46e9507372f0b1b963James Dong
900c1bc742181ded4930842b46e9507372f0b1b963James Dongh264bsdInterpolateChromaHorVer
910c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMFD   sp!, {r0-r11,lr}
920c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     sp, sp, #0xc4
930c1bc742181ded4930842b46e9507372f0b1b963James Dong
940c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPW, [sp, #0x108]     ;// chromaPartWidth
950c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     xFrac, [sp, #0x100]     ;// xFrac
960c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     width, [sp, #0xf8]      ;// width
970c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     x0, #0
980c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLT     do_fill
990c1bc742181ded4930842b46e9507372f0b1b963James Dong
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, x0, chrPW         ;// tmp1 = x0+ chromaPartWidth
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, tmp1, #1          ;// tmp1 = x0+ chromaPartWidth+1
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     tmp1, width             ;// x0+chromaPartWidth+1 > width
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong    BHI     do_fill
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     y0, #0
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLT     do_fill
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPH, [sp, #0x10c]     ;// chromaPartHeight
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp, #0xfc]     ;// height
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, y0, chrPH         ;// tmp1 = y0 + chromaPartHeight
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, tmp1, #1          ;// tmp1 = y0 + chromaPartHeight + 1
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     tmp1, height
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLS     skip_fill
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong
1140c1bc742181ded4930842b46e9507372f0b1b963James Dongdo_fill
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPH, [sp, #0x10c]     ;// chromaPartHeight
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp, #0xfc]     ;// height
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp3, chrPW, #1         ;// tmp3 = chromaPartWidth+1
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, chrPW, #1         ;// tmp1 = chromaPartWidth+1
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp2, chrPH, #1         ;// tmp2 = chromaPartHeight+1
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMIA   sp,{width,height,tmp1,tmp2,tmp3}
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     block, sp, #0x1c        ;// block
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong    BL      h264bsdFillBlock
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     x0, [sp, #0xcc]
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     y0, [sp, #0xd0]
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     ref, [sp, #0xc4]        ;// ref
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMIA   sp,{width,height,tmp1,tmp2,tmp3}
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     block, sp, #0x1c        ;// block
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     ref, height, width, ref ;// ref += width * height;
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     block, tmp2, tmp1, block;// block + (chromaPW+1)*(chromaPH+1)
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong    BL      h264bsdFillBlock
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     x0, #0                  ;// x0 = 0
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     y0, #0                  ;// y0 = 0
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     x0, [sp, #0xcc]
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     y0, [sp, #0xd0]
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ref, sp, #0x1c          ;// ref = block
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     ref, [sp, #0xc4]        ;// ref
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp2, [sp, #0xfc]       ;// height
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp1, [sp, #0xf8]       ;// width
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     width, tmp1
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong
1440c1bc742181ded4930842b46e9507372f0b1b963James Dongskip_fill
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp3, y0, width, x0     ;// tmp3 = y0*width+x0
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     yFrac, [sp, #0x104]     ;// yFrac
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     xFrac, [sp, #0x100]
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ref, tmp3         ;// ptrA = ref + y0*width+x0
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong    RSB     valX, xFrac, #8         ;// valX = 8-xFrac
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong    RSB     valY, yFrac, #8         ;// valY = 8-yFrac
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     mb, [sp, #0xc8]         ;// predPartChroma
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// pack values to count register
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [31:28] loop_x (chromaPartWidth-1)
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [27:24] loop_y (chromaPartHeight-1)
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [23:20] chromaPartWidth-1
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [19:16] chromaPartHeight-1
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [15:00] nothing
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     tmp2, chrPH, #1             ;// chromaPartHeight-1
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     tmp1, chrPW, #1             ;// chromaPartWidth-1
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #16 ;// chromaPartHeight-1
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #24 ;// loop_y
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp1, LSL #20 ;// chromaPartWidth-1
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000    ;// loop_x
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   valY, valY, yFrac, LSL #16  ;// |yFrac|valY |
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     c32, #32
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Cb
1740c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// 2x2 pels per iteration
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// bilinear vertical and horizontal interpolation
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong
1790c1bc742181ded4930842b46e9507372f0b1b963James Dongloop1_y
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA]
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp3, [ptrA, width]
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp5, [ptrA, width, LSL #1]
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp1, tmp1, tmp3, LSL #16   ;// |t3|t1|
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp3, tmp3, tmp5, LSL #16   ;// |t5|t3|
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp1, tmp1, valY            ;// t1=(t1*valY + t3*yFrac)
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp3, tmp3, valY            ;// t3=(t3*valY + t5*yFrac)
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #8
1910c1bc742181ded4930842b46e9507372f0b1b963James Dongloop1_x
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// first
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp2, [ptrA, #1]!
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp4, [ptrA, width]
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp6, [ptrA, width, LSL #1]
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp2, tmp2, tmp4, LSL #16   ;// |t4|t2|
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp4, tmp4, tmp6, LSL #16   ;// |t6|t4|
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp2, tmp2, valY            ;// t2=(t2*valY + t4*yFrac)
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp5, tmp1, valX, c32       ;// t5=t1*valX+32
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp5, tmp2, xFrac, tmp5     ;// t5=t2*xFrac+t5
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp4, tmp4, valY            ;// t4=(t4*valY + t6*yFrac)
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp6, tmp3, valX, c32       ;// t3=t3*valX+32
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp6, tmp4, xFrac, tmp6     ;// t6=t4*xFrac+t6
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp6, tmp6, LSR #6          ;// scale down
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp6, [mb, #8]              ;// store pixel
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp5, [mb], #1              ;// store pixel
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// second
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA, #1]!
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp3, [ptrA, width]
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp5, [ptrA, width, LSL #1]
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp1, tmp1, tmp3, LSL #16   ;// |t3|t1|
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp3, tmp3, tmp5, LSL #16   ;// |t5|t3|
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp1, tmp1, valY            ;// t1=(t1*valY + t3*yFrac)
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp5, tmp1, xFrac, c32      ;// t1=t1*xFrac+32
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp5, tmp2, valX, tmp5      ;// t5=t2*valX+t5
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp3, tmp3, valY            ;// t3=(t3*valY + t5*yFrac)
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp6, tmp3, xFrac, c32      ;// t3=t3*xFrac+32
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp6, tmp4, valX, tmp6      ;// t6=t4*valX+t6
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp6, tmp6, LSR #6          ;// scale down
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp6, [mb, #8]              ;// store pixel
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp5, [mb], #1              ;// store pixel
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUBS    count, count, #2<<28
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong    BCS     loop1_x
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    mb, mb, #16
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     mb, mb, tmp2, LSR #20
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ptrA, width, LSL #1
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     ptrA, ptrA, tmp2, LSR #20
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    count, count, #0xE << 24
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong    BGE     loop1_y
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Cr
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp,#0xfc]          ;// height
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     ref, [sp, #0xc4]            ;// ref
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     tmp1, [sp, #0xd0]           ;// y0
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     tmp2, [sp, #0xcc]           ;// x0
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     mb, [sp, #0xc8]             ;// predPartChroma
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, height, tmp1
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp3, tmp1, width, tmp2
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ref, tmp3
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     mb, mb, #64
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     count, count, #0x00FFFFFF
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp1, count, #0x000F0000
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp1, LSL #8
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// 2x2 pels per iteration
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// bilinear vertical and horizontal interpolation
2680c1bc742181ded4930842b46e9507372f0b1b963James Dongloop2_y
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA]
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp3, [ptrA, width]
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp5, [ptrA, width, LSL #1]
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp1, tmp1, tmp3, LSL #16   ;// |t3|t1|
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp3, tmp3, tmp5, LSL #16   ;// |t5|t3|
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp1, tmp1, valY            ;// t1=(t1*valY + t3*yFrac)
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp3, tmp3, valY            ;// t3=(t3*valY + t5*yFrac)
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #8
2800c1bc742181ded4930842b46e9507372f0b1b963James Dongloop2_x
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// first
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp2, [ptrA, #1]!
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp4, [ptrA, width]
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp6, [ptrA, width, LSL #1]
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp2, tmp2, tmp4, LSL #16   ;// |t4|t2|
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp4, tmp4, tmp6, LSL #16   ;// |t6|t4|
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp2, tmp2, valY            ;// t2=(t2*valY + t4*yFrac)
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp5, tmp1, valX, c32       ;// t5=t1*valX+32
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp5, tmp2, xFrac, tmp5     ;// t5=t2*xFrac+t5
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp4, tmp4, valY            ;// t4=(t4*valY + t6*yFrac)
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp6, tmp3, valX, c32       ;// t3=t3*valX+32
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp6, tmp4, xFrac, tmp6     ;// t6=t4*xFrac+t6
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp6, tmp6, LSR #6          ;// scale down
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp6, [mb, #8]              ;// store pixel
2990c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
3000c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp5, [mb], #1              ;// store pixel
3010c1bc742181ded4930842b46e9507372f0b1b963James Dong
3020c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// second
3030c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA, #1]!
3040c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp3, [ptrA, width]
3050c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp5, [ptrA, width, LSL #1]
3060c1bc742181ded4930842b46e9507372f0b1b963James Dong
3070c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp1, tmp1, tmp3, LSL #16   ;// |t3|t1|
3080c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp3, tmp3, tmp5, LSL #16   ;// |t5|t3|
3090c1bc742181ded4930842b46e9507372f0b1b963James Dong
3100c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp1, tmp1, valY            ;// t1=(t1*valY + t3*yFrac)
3110c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp5, tmp1, xFrac, c32      ;// t1=t1*xFrac+32
3120c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp5, tmp2, valX, tmp5      ;// t5=t2*valX+t5
3130c1bc742181ded4930842b46e9507372f0b1b963James Dong
3140c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMUAD   tmp3, tmp3, valY            ;// t3=(t3*valY + t5*yFrac)
3150c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp6, tmp3, xFrac, c32      ;// t3=t3*xFrac+32
3160c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp6, tmp4, valX, tmp6      ;// t6=t4*valX+t6
3170c1bc742181ded4930842b46e9507372f0b1b963James Dong
3180c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp6, tmp6, LSR #6          ;// scale down
3190c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp6, [mb, #8]              ;// store pixel
3200c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
3210c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp5, [mb], #1              ;// store pixel
3220c1bc742181ded4930842b46e9507372f0b1b963James Dong
3230c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUBS    count, count, #2<<28
3240c1bc742181ded4930842b46e9507372f0b1b963James Dong    BCS     loop2_x
3250c1bc742181ded4930842b46e9507372f0b1b963James Dong
3260c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
3270c1bc742181ded4930842b46e9507372f0b1b963James Dong
3280c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    mb, mb, #16
3290c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     mb, mb, tmp2, LSR #20
3300c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ptrA, width, LSL #1
3310c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     ptrA, ptrA, tmp2, LSR #20
3320c1bc742181ded4930842b46e9507372f0b1b963James Dong
3330c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    count, count, #0xE << 24
3340c1bc742181ded4930842b46e9507372f0b1b963James Dong    BGE     loop2_y
3350c1bc742181ded4930842b46e9507372f0b1b963James Dong
3360c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     sp,sp,#0xd4
3370c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDMFD   sp!,{r4-r11,pc}
3380c1bc742181ded4930842b46e9507372f0b1b963James Dong
3390c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
340