10c1bc742181ded4930842b46e9507372f0b1b963James Dong; Copyright (C) 2009 The Android Open Source Project
20c1bc742181ded4930842b46e9507372f0b1b963James Dong;
30c1bc742181ded4930842b46e9507372f0b1b963James Dong; Licensed under the Apache License, Version 2.0 (the "License");
40c1bc742181ded4930842b46e9507372f0b1b963James Dong; you may not use this file except in compliance with the License.
50c1bc742181ded4930842b46e9507372f0b1b963James Dong; You may obtain a copy of the License at
60c1bc742181ded4930842b46e9507372f0b1b963James Dong;
70c1bc742181ded4930842b46e9507372f0b1b963James Dong;      http://www.apache.org/licenses/LICENSE-2.0
80c1bc742181ded4930842b46e9507372f0b1b963James Dong;
90c1bc742181ded4930842b46e9507372f0b1b963James Dong; Unless required by applicable law or agreed to in writing, software
100c1bc742181ded4930842b46e9507372f0b1b963James Dong; distributed under the License is distributed on an "AS IS" BASIS,
110c1bc742181ded4930842b46e9507372f0b1b963James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
120c1bc742181ded4930842b46e9507372f0b1b963James Dong; See the License for the specific language governing permissions and
130c1bc742181ded4930842b46e9507372f0b1b963James Dong; limitations under the License.
140c1bc742181ded4930842b46e9507372f0b1b963James Dong
150c1bc742181ded4930842b46e9507372f0b1b963James Dong;-------------------------------------------------------------------------------
160c1bc742181ded4930842b46e9507372f0b1b963James Dong;--
170c1bc742181ded4930842b46e9507372f0b1b963James Dong;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHor function
180c1bc742181ded4930842b46e9507372f0b1b963James Dong;--
190c1bc742181ded4930842b46e9507372f0b1b963James Dong;-------------------------------------------------------------------------------
200c1bc742181ded4930842b46e9507372f0b1b963James Dong
210c1bc742181ded4930842b46e9507372f0b1b963James Dong
220c1bc742181ded4930842b46e9507372f0b1b963James Dong    IF  :DEF: H264DEC_WINASM
230c1bc742181ded4930842b46e9507372f0b1b963James Dong        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
240c1bc742181ded4930842b46e9507372f0b1b963James Dong    ELSE
250c1bc742181ded4930842b46e9507372f0b1b963James Dong        REQUIRE8
260c1bc742181ded4930842b46e9507372f0b1b963James Dong        PRESERVE8
270c1bc742181ded4930842b46e9507372f0b1b963James Dong    ENDIF
280c1bc742181ded4930842b46e9507372f0b1b963James Dong
290c1bc742181ded4930842b46e9507372f0b1b963James Dong    AREA    |.text|, CODE
300c1bc742181ded4930842b46e9507372f0b1b963James Dong
310c1bc742181ded4930842b46e9507372f0b1b963James Dong
320c1bc742181ded4930842b46e9507372f0b1b963James Dong;// h264bsdInterpolateChromaHor register allocation
330c1bc742181ded4930842b46e9507372f0b1b963James Dong
340c1bc742181ded4930842b46e9507372f0b1b963James Dongref     RN 0
350c1bc742181ded4930842b46e9507372f0b1b963James DongptrA    RN 0
360c1bc742181ded4930842b46e9507372f0b1b963James Dong
370c1bc742181ded4930842b46e9507372f0b1b963James Dongmb      RN 1
380c1bc742181ded4930842b46e9507372f0b1b963James Dongblock   RN 1
390c1bc742181ded4930842b46e9507372f0b1b963James Dong
400c1bc742181ded4930842b46e9507372f0b1b963James Dongx0      RN 2
410c1bc742181ded4930842b46e9507372f0b1b963James Dongcount   RN 2
420c1bc742181ded4930842b46e9507372f0b1b963James Dong
430c1bc742181ded4930842b46e9507372f0b1b963James Dongy0      RN 3
440c1bc742181ded4930842b46e9507372f0b1b963James DongvalX    RN 3
450c1bc742181ded4930842b46e9507372f0b1b963James Dong
460c1bc742181ded4930842b46e9507372f0b1b963James Dongwidth   RN 4
470c1bc742181ded4930842b46e9507372f0b1b963James Dong
480c1bc742181ded4930842b46e9507372f0b1b963James Dongheight  RN 5
490c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp7    RN 5
500c1bc742181ded4930842b46e9507372f0b1b963James Dong
510c1bc742181ded4930842b46e9507372f0b1b963James DongchrPW   RN 6
520c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp8    RN 6
530c1bc742181ded4930842b46e9507372f0b1b963James Dong
540c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp1    RN 7
550c1bc742181ded4930842b46e9507372f0b1b963James DongchrPH   RN 7
560c1bc742181ded4930842b46e9507372f0b1b963James Dong
570c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp2    RN 8
580c1bc742181ded4930842b46e9507372f0b1b963James Dong
590c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp3    RN 9
600c1bc742181ded4930842b46e9507372f0b1b963James Dong
610c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp4    RN 10
620c1bc742181ded4930842b46e9507372f0b1b963James Dong
630c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp5    RN 11
640c1bc742181ded4930842b46e9507372f0b1b963James Dong
650c1bc742181ded4930842b46e9507372f0b1b963James Dongtmp6    RN 12
660c1bc742181ded4930842b46e9507372f0b1b963James Dong
670c1bc742181ded4930842b46e9507372f0b1b963James Dongc32     RN 14
680c1bc742181ded4930842b46e9507372f0b1b963James DongxFrac   RN 14
690c1bc742181ded4930842b46e9507372f0b1b963James Dong
700c1bc742181ded4930842b46e9507372f0b1b963James Dong;// Function exports and imports
710c1bc742181ded4930842b46e9507372f0b1b963James Dong
720c1bc742181ded4930842b46e9507372f0b1b963James Dong    IMPORT  h264bsdFillBlock
730c1bc742181ded4930842b46e9507372f0b1b963James Dong
740c1bc742181ded4930842b46e9507372f0b1b963James Dong    EXPORT  h264bsdInterpolateChromaHor
750c1bc742181ded4930842b46e9507372f0b1b963James Dong
760c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  Function arguments
770c1bc742181ded4930842b46e9507372f0b1b963James Dong;//
780c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u8 *ref,                   : 0xc4
790c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u8 *predPartChroma,        : 0xc8
800c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  i32 x0,                    : 0xcc
810c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  i32 y0,                    : 0xd0
820c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 width,                 : 0xf8
830c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 height,                : 0xfc
840c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 xFrac,                 : 0x100
850c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 chromaPartWidth,       : 0x104
860c1bc742181ded4930842b46e9507372f0b1b963James Dong;//  u32 chromaPartHeight       : 0x108
870c1bc742181ded4930842b46e9507372f0b1b963James Dong
880c1bc742181ded4930842b46e9507372f0b1b963James Dongh264bsdInterpolateChromaHor
890c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMFD   sp!, {r0-r11,lr}
900c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     sp, sp, #0xc4
910c1bc742181ded4930842b46e9507372f0b1b963James Dong
920c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPW, [sp, #0x104]     ;// chromaPartWidth
930c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     width, [sp, #0xf8]      ;// width
940c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     x0, #0
950c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLT     do_fill
960c1bc742181ded4930842b46e9507372f0b1b963James Dong
970c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp6, x0, chrPW         ;// tmp6 = x0+ chromaPartWidth
980c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp6, tmp6, #1          ;// tmp6 = x0 + chromaPartWidth + 1
990c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     tmp6, width             ;// x0+chromaPartWidth+1 > width
1000c1bc742181ded4930842b46e9507372f0b1b963James Dong    BHI     do_fill
1010c1bc742181ded4930842b46e9507372f0b1b963James Dong
1020c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     y0, #0
1030c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLT     do_fill
1040c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
1050c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp, #0xfc]     ;// height
1060c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp6, y0, chrPH         ;// tmp6 = y0 + chromaPartHeight
1070c1bc742181ded4930842b46e9507372f0b1b963James Dong    CMP     tmp6, height
1080c1bc742181ded4930842b46e9507372f0b1b963James Dong    BLS     skip_fill
1090c1bc742181ded4930842b46e9507372f0b1b963James Dong
1100c1bc742181ded4930842b46e9507372f0b1b963James Dongdo_fill
1110c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
1120c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp, #0xfc]     ;// height
1130c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp8, chrPW, #1         ;// tmp8 = chromaPartWidth+1
1140c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp2, tmp8              ;// tmp2 = chromaPartWidth+1
1150c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMIA   sp,{width,height,tmp8,chrPH,tmp2}
1160c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     block, sp, #0x1c        ;// block
1170c1bc742181ded4930842b46e9507372f0b1b963James Dong    BL      h264bsdFillBlock
1180c1bc742181ded4930842b46e9507372f0b1b963James Dong
1190c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     x0, [sp, #0xcc]
1200c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     y0, [sp, #0xd0]
1210c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     ref, [sp, #0xc4]        ;// ref
1220c1bc742181ded4930842b46e9507372f0b1b963James Dong    STMIA   sp,{width,height,tmp8,chrPH,tmp2}
1230c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     block, sp, #0x1c        ;// block
1240c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     ref, height, width, ref ;// ref += width * height;
1250c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     block, chrPH, tmp8, block;// block + (chromaPH)*(chromaPW+1)
1260c1bc742181ded4930842b46e9507372f0b1b963James Dong    BL      h264bsdFillBlock
1270c1bc742181ded4930842b46e9507372f0b1b963James Dong
1280c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     x0, #0                  ;// x0 = 0
1290c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     y0, #0                  ;// y0 = 0
1300c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     x0, [sp, #0xcc]
1310c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     y0, [sp, #0xd0]
1320c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ref, sp, #0x1c          ;// ref = block
1330c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     ref, [sp, #0xc4]        ;// ref
1340c1bc742181ded4930842b46e9507372f0b1b963James Dong
1350c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     chrPH, [sp, #0xfc]      ;// height
1360c1bc742181ded4930842b46e9507372f0b1b963James Dong    STR     tmp8, [sp, #0xf8]       ;// width
1370c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     width, tmp8
1380c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     chrPW, chrPW, #1
1390c1bc742181ded4930842b46e9507372f0b1b963James Dong
1400c1bc742181ded4930842b46e9507372f0b1b963James Dongskip_fill
1410c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp3, y0, width, x0     ;// tmp3 = y0*width+x0
1420c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     xFrac, [sp, #0x100]     ;// xFrac
1430c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ref, tmp3         ;// ptrA = ref + y0*width+x0
1440c1bc742181ded4930842b46e9507372f0b1b963James Dong    RSB     valX, xFrac, #8         ;// valX = 8-xFrac
1450c1bc742181ded4930842b46e9507372f0b1b963James Dong
1460c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     mb, [sp, #0xc8]         ;// predPartChroma
1470c1bc742181ded4930842b46e9507372f0b1b963James Dong
1480c1bc742181ded4930842b46e9507372f0b1b963James Dong
1490c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// pack values to count register
1500c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [31:28] loop_x (chromaPartWidth-1)
1510c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [27:24] loop_y (chromaPartHeight-1)
1520c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [23:20] chromaPartWidth-1
1530c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [19:16] chromaPartHeight-1
1540c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// [15:00] nothing
1550c1bc742181ded4930842b46e9507372f0b1b963James Dong
1560c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     tmp2, chrPH, #1             ;// chromaPartHeight-1
1570c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     tmp1, chrPW, #1             ;// chromaPartWidth-1
1580c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #16 ;// chromaPartHeight-1
1590c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #24 ;// loop_y
1600c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp1, LSL #20 ;// chromaPartWidth-1
1610c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000    ;// loop_x
1620c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   valX, valX, xFrac, LSL #16  ;// |xFrac|valX |
1630c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     valX, valX, LSL #3          ;// multiply by 8 in advance
1640c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     c32, #32
1650c1bc742181ded4930842b46e9507372f0b1b963James Dong
1660c1bc742181ded4930842b46e9507372f0b1b963James Dong
1670c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
1680c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Cb
1690c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
1700c1bc742181ded4930842b46e9507372f0b1b963James Dong
1710c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// 2x2 pels per iteration
1720c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// bilinear vertical interpolation
1730c1bc742181ded4930842b46e9507372f0b1b963James Dong
1740c1bc742181ded4930842b46e9507372f0b1b963James Dongloop1_y
1750c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #8
1760c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA, width]
1770c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp2, [ptrA], #1
1780c1bc742181ded4930842b46e9507372f0b1b963James Dong
1790c1bc742181ded4930842b46e9507372f0b1b963James Dongloop1_x
1800c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp3, [ptrA, width]
1810c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp4, [ptrA], #1
1820c1bc742181ded4930842b46e9507372f0b1b963James Dong
1830c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp5, tmp1, tmp3, LSL #16
1840c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp6, tmp2, tmp4, LSL #16
1850c1bc742181ded4930842b46e9507372f0b1b963James Dong
1860c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA, width]
1870c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp2, [ptrA], #1
1880c1bc742181ded4930842b46e9507372f0b1b963James Dong
1890c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp5, tmp5, valX, c32       ;// multiply
1900c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp6, tmp6, valX, c32       ;// multiply
1910c1bc742181ded4930842b46e9507372f0b1b963James Dong
1920c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp7, tmp3, tmp1, LSL #16
1930c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp8, tmp4, tmp2, LSL #16
1940c1bc742181ded4930842b46e9507372f0b1b963James Dong
1950c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp7, tmp7, valX, c32       ;// multiply
1960c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp8, tmp8, valX, c32       ;// multiply
1970c1bc742181ded4930842b46e9507372f0b1b963James Dong
1980c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
1990c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp5, [mb,#8]               ;// store row 2 col 1
2000c1bc742181ded4930842b46e9507372f0b1b963James Dong
2010c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp6, tmp6, LSR #6          ;// scale down
2020c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp6, [mb],#1               ;// store row 1 col 1
2030c1bc742181ded4930842b46e9507372f0b1b963James Dong
2040c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp7, tmp7, LSR #6          ;// scale down
2050c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp7, [mb,#8]               ;// store row 2 col 2
2060c1bc742181ded4930842b46e9507372f0b1b963James Dong
2070c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp8, tmp8, LSR #6          ;// scale down
2080c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp8, [mb],#1               ;// store row 1 col 2
2090c1bc742181ded4930842b46e9507372f0b1b963James Dong
2100c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUBS    count, count, #2<<28
2110c1bc742181ded4930842b46e9507372f0b1b963James Dong    BCS     loop1_x
2120c1bc742181ded4930842b46e9507372f0b1b963James Dong
2130c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
2140c1bc742181ded4930842b46e9507372f0b1b963James Dong
2150c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    mb, mb, #16
2160c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     mb, mb, tmp2, LSR #20
2170c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ptrA, width, LSL #1
2180c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     ptrA, ptrA, tmp2, LSR #20
2190c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     ptrA, ptrA, #1
2200c1bc742181ded4930842b46e9507372f0b1b963James Dong
2210c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    count, count, #0xE << 24
2220c1bc742181ded4930842b46e9507372f0b1b963James Dong    BGE     loop1_y
2230c1bc742181ded4930842b46e9507372f0b1b963James Dong
2240c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
2250c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// Cr
2260c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;///////////////////////////////////////////////////////////////////////////
2270c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     height, [sp,#0xfc]          ;// height
2280c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     ref, [sp, #0xc4]            ;// ref
2290c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     tmp1, [sp, #0xd0]           ;// y0
2300c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     tmp2, [sp, #0xcc]           ;// x0
2310c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDR     mb, [sp, #0xc8]             ;// predPartChroma
2320c1bc742181ded4930842b46e9507372f0b1b963James Dong
2330c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     tmp1, height, tmp1
2340c1bc742181ded4930842b46e9507372f0b1b963James Dong    MLA     tmp3, tmp1, width, tmp2
2350c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ref, tmp3
2360c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     mb, mb, #64
2370c1bc742181ded4930842b46e9507372f0b1b963James Dong
2380c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     count, count, #0x00FFFFFF
2390c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp1, count, #0x000F0000
2400c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp1, LSL #8
2410c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
2420c1bc742181ded4930842b46e9507372f0b1b963James Dong
2430c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// 2x2 pels per iteration
2440c1bc742181ded4930842b46e9507372f0b1b963James Dong    ;// bilinear vertical interpolation
2450c1bc742181ded4930842b46e9507372f0b1b963James Dongloop2_y
2460c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     count, count, tmp2, LSL #8
2470c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA, width]
2480c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp2, [ptrA], #1
2490c1bc742181ded4930842b46e9507372f0b1b963James Dong
2500c1bc742181ded4930842b46e9507372f0b1b963James Dongloop2_x
2510c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp3, [ptrA, width]
2520c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp4, [ptrA], #1
2530c1bc742181ded4930842b46e9507372f0b1b963James Dong
2540c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp5, tmp1, tmp3, LSL #16
2550c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp6, tmp2, tmp4, LSL #16
2560c1bc742181ded4930842b46e9507372f0b1b963James Dong
2570c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp1, [ptrA, width]
2580c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDRB    tmp2, [ptrA], #1
2590c1bc742181ded4930842b46e9507372f0b1b963James Dong
2600c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp5, tmp5, valX, c32       ;// multiply
2610c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp6, tmp6, valX, c32       ;// multiply
2620c1bc742181ded4930842b46e9507372f0b1b963James Dong
2630c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp7, tmp3, tmp1, LSL #16
2640c1bc742181ded4930842b46e9507372f0b1b963James Dong    PKHBT   tmp8, tmp4, tmp2, LSL #16
2650c1bc742181ded4930842b46e9507372f0b1b963James Dong
2660c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp7, tmp7, valX, c32       ;// multiply
2670c1bc742181ded4930842b46e9507372f0b1b963James Dong    SMLAD   tmp8, tmp8, valX, c32       ;// multiply
2680c1bc742181ded4930842b46e9507372f0b1b963James Dong
2690c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
2700c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp5, [mb,#8]               ;// store row 2 col 1
2710c1bc742181ded4930842b46e9507372f0b1b963James Dong
2720c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp6, tmp6, LSR #6          ;// scale down
2730c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp6, [mb],#1               ;// store row 1 col 1
2740c1bc742181ded4930842b46e9507372f0b1b963James Dong
2750c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp7, tmp7, LSR #6          ;// scale down
2760c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp7, [mb,#8]               ;// store row 2 col 2
2770c1bc742181ded4930842b46e9507372f0b1b963James Dong
2780c1bc742181ded4930842b46e9507372f0b1b963James Dong    MOV     tmp8, tmp8, LSR #6          ;// scale down
2790c1bc742181ded4930842b46e9507372f0b1b963James Dong    STRB    tmp8, [mb],#1               ;// store row 1 col 2
2800c1bc742181ded4930842b46e9507372f0b1b963James Dong
2810c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUBS    count, count, #2<<28
2820c1bc742181ded4930842b46e9507372f0b1b963James Dong    BCS     loop2_x
2830c1bc742181ded4930842b46e9507372f0b1b963James Dong
2840c1bc742181ded4930842b46e9507372f0b1b963James Dong    AND     tmp2, count, #0x00F00000
2850c1bc742181ded4930842b46e9507372f0b1b963James Dong
2860c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    mb, mb, #16
2870c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     mb, mb, tmp2, LSR #20
2880c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     ptrA, ptrA, width, LSL #1
2890c1bc742181ded4930842b46e9507372f0b1b963James Dong    SBC     ptrA, ptrA, tmp2, LSR #20
2900c1bc742181ded4930842b46e9507372f0b1b963James Dong    SUB     ptrA, ptrA, #1
2910c1bc742181ded4930842b46e9507372f0b1b963James Dong
2920c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADDS    count, count, #0xE << 24
2930c1bc742181ded4930842b46e9507372f0b1b963James Dong    BGE     loop2_y
2940c1bc742181ded4930842b46e9507372f0b1b963James Dong
2950c1bc742181ded4930842b46e9507372f0b1b963James Dong    ADD     sp,sp,#0xd4
2960c1bc742181ded4930842b46e9507372f0b1b963James Dong    LDMFD   sp!, {r4-r11,pc}
2970c1bc742181ded4930842b46e9507372f0b1b963James Dong
2980c1bc742181ded4930842b46e9507372f0b1b963James Dong    END
299