h264bsd_interpolate_chroma_ver.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
10c1bc742181ded4930842b46e9507372f0b1b963James Dong; Copyright (C) 2009 The Android Open Source Project
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;
30c1bc742181ded4930842b46e9507372f0b1b963James Dong; Licensed under the Apache License, Version 2.0 (the "License");
40c1bc742181ded4930842b46e9507372f0b1b963James Dong; you may not use this file except in compliance with the License.
50c1bc742181ded4930842b46e9507372f0b1b963James Dong; You may obtain a copy of the License at
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;      http://www.apache.org/licenses/LICENSE-2.0
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;
90c1bc742181ded4930842b46e9507372f0b1b963James Dong; Unless required by applicable law or agreed to in writing, software
100c1bc742181ded4930842b46e9507372f0b1b963James Dong; distributed under the License is distributed on an "AS IS" BASIS,
110c1bc742181ded4930842b46e9507372f0b1b963James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
120c1bc742181ded4930842b46e9507372f0b1b963James Dong; See the License for the specific language governing permissions and
130c1bc742181ded4930842b46e9507372f0b1b963James Dong; limitations under the License.
140c1bc742181ded4930842b46e9507372f0b1b963James Dong
150c1bc742181ded4930842b46e9507372f0b1b963James Dong;-------------------------------------------------------------------------------
160c1bc742181ded4930842b46e9507372f0b1b963James Dong;--
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaVer function
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;--
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;-------------------------------------------------------------------------------
200c1bc742181ded4930842b46e9507372f0b1b963James Dong
210c1bc742181ded4930842b46e9507372f0b1b963James Dong
220c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF :DEF: H264DEC_WINASM
230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
240c1bc742181ded4930842b46e9507372f0b1b963James Dong    ELSE
250c1bc742181ded4930842b46e9507372f0b1b963James Dong        REQUIRE8
260c1bc742181ded4930842b46e9507372f0b1b963James Dong        PRESERVE8
270c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
280c1bc742181ded4930842b46e9507372f0b1b963James Dong
290c1bc742181ded4930842b46e9507372f0b1b963James Dong    AREA    |.text|, CODE
300c1bc742181ded4930842b46e9507372f0b1b963James Dong
310c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h264bsdInterpolateChromaVer register allocation
320c1bc742181ded4930842b46e9507372f0b1b963James Dong
330c1bc742181ded4930842b46e9507372f0b1b963James Dongref     RN 0
340c1bc742181ded4930842b46e9507372f0b1b963James DongptrA    RN 0
350c1bc742181ded4930842b46e9507372f0b1b963James Dong
360c1bc742181ded4930842b46e9507372f0b1b963James Dongmb      RN 1
370c1bc742181ded4930842b46e9507372f0b1b963James Dongblock   RN 1
380c1bc742181ded4930842b46e9507372f0b1b963James Dong
390c1bc742181ded4930842b46e9507372f0b1b963James Dongx0      RN 2
400c1bc742181ded4930842b46e9507372f0b1b963James Dongcount   RN 2
410c1bc742181ded4930842b46e9507372f0b1b963James Dong
420c1bc742181ded4930842b46e9507372f0b1b963James Dongy0      RN 3
430c1bc742181ded4930842b46e9507372f0b1b963James DongvalY    RN 3
440c1bc742181ded4930842b46e9507372f0b1b963James Dong
450c1bc742181ded4930842b46e9507372f0b1b963James Dongwidth   RN 4
460c1bc742181ded4930842b46e9507372f0b1b963James Dong
470c1bc742181ded4930842b46e9507372f0b1b963James Dongheight  RN 5
480c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp7    RN 5
490c1bc742181ded4930842b46e9507372f0b1b963James Dong
500c1bc742181ded4930842b46e9507372f0b1b963James DongchrPW   RN 6
510c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp8    RN 6
520c1bc742181ded4930842b46e9507372f0b1b963James Dong
530c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp1    RN 7
540c1bc742181ded4930842b46e9507372f0b1b963James Dong
550c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp2    RN 8
560c1bc742181ded4930842b46e9507372f0b1b963James Dong
570c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp3    RN 9
580c1bc742181ded4930842b46e9507372f0b1b963James Dong
590c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp4    RN 10
600c1bc742181ded4930842b46e9507372f0b1b963James Dong
610c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp5    RN 11
620c1bc742181ded4930842b46e9507372f0b1b963James DongchrPH   RN 11
630c1bc742181ded4930842b46e9507372f0b1b963James Dong
640c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp6    RN 12
650c1bc742181ded4930842b46e9507372f0b1b963James Dong
660c1bc742181ded4930842b46e9507372f0b1b963James Dongc32     RN 14
670c1bc742181ded4930842b46e9507372f0b1b963James DongyFrac   RN 14
680c1bc742181ded4930842b46e9507372f0b1b963James Dong
690c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function exports and imports
700c1bc742181ded4930842b46e9507372f0b1b963James Dong
710c1bc742181ded4930842b46e9507372f0b1b963James Dong    IMPORT  h264bsdFillBlock
720c1bc742181ded4930842b46e9507372f0b1b963James Dong
730c1bc742181ded4930842b46e9507372f0b1b963James Dong    EXPORT  h264bsdInterpolateChromaVer
740c1bc742181ded4930842b46e9507372f0b1b963James Dong
750c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  Function arguments
760c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
770c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u8 *ref,                   : 0xc4
780c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u8 *predPartChroma,        : 0xc8
790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  i32 x0,                    : 0xcc
800c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  i32 y0,                    : 0xd0
810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 width,                 : 0xf8
820c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 height,                : 0xfc
830c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 yFrac,                 : 0x100
840c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 chromaPartWidth,       : 0x104
850c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 chromaPartHeight       : 0x108
860c1bc742181ded4930842b46e9507372f0b1b963James Dong
870c1bc742181ded4930842b46e9507372f0b1b963James Dongh264bsdInterpolateChromaVer
880c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMFD   sp!, {r0-r11,lr}
890c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     sp, sp, #0xc4
900c1bc742181ded4930842b46e9507372f0b1b963James Dong
910c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPW, [sp, #0x104]     ;// chromaPartWidth
920c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     width, [sp, #0xf8]      ;// width
930c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     x0, #0
940c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLT     do_fill
950c1bc742181ded4930842b46e9507372f0b1b963James Dong
960c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, x0, chrPW         ;// tmp1 = x0+ chromaPartWidth
970c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     tmp1, width             ;// x0+chromaPartWidth > width
980c1bc742181ded4930842b46e9507372f0b1b963James Dong    BHI     do_fill
990c1bc742181ded4930842b46e9507372f0b1b963James Dong
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     y0, #0
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLT     do_fill
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp, #0xfc]     ;// height
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, y0, chrPH         ;// tmp1 = y0 + chromaPartHeight
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, tmp1, #1          ;// tmp1 = y0 + chromaPartHeight + 1
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     tmp1, height
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLS     skip_fill
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong
1090c1bc742181ded4930842b46e9507372f0b1b963James Dongdo_fill
1100c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp, #0xfc]     ;// height
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, chrPH, #1         ;// tmp1 = chromaPartHeight+1
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp2, chrPW             ;// tmp2 = chromaPartWidth
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMIA   sp,{width,height,chrPW,tmp1,tmp2}
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     block, sp, #0x1c        ;// block
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong    BL      h264bsdFillBlock
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     x0, [sp, #0xcc]
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     y0, [sp, #0xd0]
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     ref, [sp, #0xc4]        ;// ref
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMIA   sp,{width,height,chrPW,tmp1,tmp2}
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     block, sp, #0x1c        ;// block
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     ref, height, width, ref ;// ref += width * height;
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     block, chrPW, tmp1, block;// block + (chromaPW)*(chromaPH+1)
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong    BL      h264bsdFillBlock
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     x0, #0                  ;// x0 = 0
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     y0, #0                  ;// y0 = 0
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     x0, [sp, #0xcc]
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     y0, [sp, #0xd0]
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ref, sp, #0x1c          ;// ref = block
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     ref, [sp, #0xc4]        ;// ref
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp1, [sp, #0xfc]       ;// height
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     chrPW, [sp, #0xf8]      ;// width
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     width, chrPW
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong
1380c1bc742181ded4930842b46e9507372f0b1b963James Dongskip_fill
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp3, y0, width, x0     ;// tmp3 = y0*width+x0
1400c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     yFrac, [sp, #0x100]     ;// yFrac
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ref, tmp3         ;// ptrA = ref + y0*width+x0
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong    RSB     valY, yFrac, #8         ;// valY = 8-yFrac
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     mb, [sp, #0xc8]         ;// predPartChroma
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// pack values to count register
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [31:28] loop_x (chromaPartWidth-1)
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [27:24] loop_y (chromaPartHeight-1)
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [23:20] chromaPartWidth-1
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [19:16] chromaPartHeight-1
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [15:00] nothing
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     tmp2, chrPH, #1             ;// chromaPartHeight-1
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     tmp1, chrPW, #1             ;// chromaPartWidth-1
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #16 ;// chromaPartHeight-1
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #24 ;// loop_y
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp1, LSL #20 ;// chromaPartWidth-1
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000    ;// loop_x
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   valY, valY, yFrac, LSL #16  ;// |yFrac|valY |
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     valY, valY, LSL #3          ;// multiply by 8 in advance
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     c32, #32
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Cb
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// 2x2 pels per iteration
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// bilinear vertical interpolation
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong
1720c1bc742181ded4930842b46e9507372f0b1b963James Dongloop1_y
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #8
1740c1bc742181ded4930842b46e9507372f0b1b963James Dongloop1_x
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Process 2x2 block
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp2, [ptrA,width]          ;// 2 row, 1 col
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp3, [ptrA,width, LSL #1]  ;// 3 row, 1 col
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA],#1             ;// 1 row, 1 col
1790c1bc742181ded4930842b46e9507372f0b1b963James Dong
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp5, [ptrA,width]          ;// 2 row, 2 col
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp6, [ptrA,width, LSL #1]  ;// 3 row, 2 col
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp4, [ptrA],#1             ;// 1 row, 2 col
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp1, tmp1, tmp2, LSL #16   ;// |B|A|
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp2, tmp2, tmp3, LSL #16   ;// |C|B|
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp4, tmp4, tmp5, LSL #16   ;// |B|A|
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp7, tmp2, valY, c32       ;// multiply
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp5, tmp5, tmp6, LSL #16   ;// |C|B|
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp2, tmp1, valY, c32       ;// multiply
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp8, tmp5, valY, c32       ;// multiply
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp5, tmp4, valY, c32       ;// multiply
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp7, tmp7, LSR #6          ;// scale down
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp7, [mb,#8]               ;// store row 2 col 1
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp2, tmp2, LSR #6          ;// scale down
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp2, [mb],#1               ;// store row 1 col 1
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp8, tmp8, LSR #6          ;// scale down
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp8, [mb,#8]               ;// store row 2 col 2
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp5, [mb],#1               ;// store row 1 col 2
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUBS    count, count, #2<<28
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong    BCS     loop1_x
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    mb, mb, #16
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     mb, mb, tmp2, LSR #20
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ptrA, width, LSL #1
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     ptrA, ptrA, tmp2, LSR #20
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    count, count, #0xE << 24
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong    BGE     loop1_y
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Cr
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp,#0xfc]          ;// height
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     ref, [sp, #0xc4]            ;// ref
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     tmp1, [sp, #0xd0]           ;// y0
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     tmp2, [sp, #0xcc]           ;// x0
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     mb, [sp, #0xc8]             ;// predPartChroma
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, height, tmp1
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp3, tmp1, width, tmp2
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ref, tmp3
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     mb, mb, #64
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     count, count, #0x00FFFFFF
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp1, count, #0x000F0000
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp1, LSL #8
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// 2x2 pels per iteration
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// bilinear vertical interpolation
2390c1bc742181ded4930842b46e9507372f0b1b963James Dongloop2_y
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #8
2410c1bc742181ded4930842b46e9507372f0b1b963James Dongloop2_x
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Process 2x2 block
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp2, [ptrA,width]          ;// 2 row, 1 col
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp3, [ptrA,width, LSL #1]  ;// 3 row, 1 col
2450c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA],#1             ;// 1 row, 1 col
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp5, [ptrA,width]          ;// 2 row, 2 col
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp6, [ptrA,width, LSL #1]  ;// 3 row, 2 col
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp4, [ptrA],#1             ;// 1 row, 2 col
2500c1bc742181ded4930842b46e9507372f0b1b963James Dong
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp1, tmp1, tmp2, LSL #16   ;// |B|A|
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp2, tmp2, tmp3, LSL #16   ;// |C|B|
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp4, tmp4, tmp5, LSL #16   ;// |B|A|
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp7, tmp2, valY, c32       ;// multiply
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp5, tmp5, tmp6, LSL #16   ;// |C|B|
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp2, tmp1, valY, c32       ;// multiply
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp8, tmp5, valY, c32       ;// multiply
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp5, tmp4, valY, c32       ;// multiply
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp7, tmp7, LSR #6          ;// scale down
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp7, [mb,#8]               ;// store row 2 col 1
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp2, tmp2, LSR #6          ;// scale down
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp2, [mb],#1               ;// store row 1 col 1
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp8, tmp8, LSR #6          ;// scale down
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp8, [mb,#8]               ;// store row 2 col 2
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp5, [mb],#1               ;// store row 1 col 2
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUBS    count, count, #2<<28
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong    BCS     loop2_x
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    mb, mb, #16
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     mb, mb, tmp2, LSR #20
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ptrA, width, LSL #1
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     ptrA, ptrA, tmp2, LSR #20
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    count, count, #0xE << 24
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong    BGE     loop2_y
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     sp,sp,#0xd4
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDMFD   sp!, {r4-r11,pc}
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
289