1bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; Copyright (C) 2009 The Android Open Source Project
2bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;
3bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; Licensed under the Apache License, Version 2.0 (the "License");
4bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; you may not use this file except in compliance with the License.
5bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; You may obtain a copy of the License at
6bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;
7bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;      http://www.apache.org/licenses/LICENSE-2.0
8bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;
9bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; Unless required by applicable law or agreed to in writing, software
10bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; distributed under the License is distributed on an "AS IS" BASIS,
11bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; See the License for the specific language governing permissions and
13bebc99d6fa433c04139294a5057f8439d772dbd9James Dong; limitations under the License.
14bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
15bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;-------------------------------------------------------------------------------
16bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;--
17bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaVer function
18bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;--
19bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;-------------------------------------------------------------------------------
20bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
21bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
22bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    IF :DEF: H264DEC_WINASM
23bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
24bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ELSE
25bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        REQUIRE8
26bebc99d6fa433c04139294a5057f8439d772dbd9James Dong        PRESERVE8
27bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ENDIF
28bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
29bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    AREA    |.text|, CODE
30bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
31bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// h264bsdInterpolateChromaVer register allocation
32bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
33bebc99d6fa433c04139294a5057f8439d772dbd9James Dongref     RN 0
34bebc99d6fa433c04139294a5057f8439d772dbd9James DongptrA    RN 0
35bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
36bebc99d6fa433c04139294a5057f8439d772dbd9James Dongmb      RN 1
37bebc99d6fa433c04139294a5057f8439d772dbd9James Dongblock   RN 1
38bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
39bebc99d6fa433c04139294a5057f8439d772dbd9James Dongx0      RN 2
40bebc99d6fa433c04139294a5057f8439d772dbd9James Dongcount   RN 2
41bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
42bebc99d6fa433c04139294a5057f8439d772dbd9James Dongy0      RN 3
43bebc99d6fa433c04139294a5057f8439d772dbd9James DongvalY    RN 3
44bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
45bebc99d6fa433c04139294a5057f8439d772dbd9James Dongwidth   RN 4
46bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
47bebc99d6fa433c04139294a5057f8439d772dbd9James Dongheight  RN 5
48bebc99d6fa433c04139294a5057f8439d772dbd9James Dongtmp7    RN 5
49bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
50bebc99d6fa433c04139294a5057f8439d772dbd9James DongchrPW   RN 6
51bebc99d6fa433c04139294a5057f8439d772dbd9James Dongtmp8    RN 6
52bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
53bebc99d6fa433c04139294a5057f8439d772dbd9James Dongtmp1    RN 7
54bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
55bebc99d6fa433c04139294a5057f8439d772dbd9James Dongtmp2    RN 8
56bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
57bebc99d6fa433c04139294a5057f8439d772dbd9James Dongtmp3    RN 9
58bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
59bebc99d6fa433c04139294a5057f8439d772dbd9James Dongtmp4    RN 10
60bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
61bebc99d6fa433c04139294a5057f8439d772dbd9James Dongtmp5    RN 11
62bebc99d6fa433c04139294a5057f8439d772dbd9James DongchrPH   RN 11
63bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
64bebc99d6fa433c04139294a5057f8439d772dbd9James Dongtmp6    RN 12
65bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
66bebc99d6fa433c04139294a5057f8439d772dbd9James Dongc32     RN 14
67bebc99d6fa433c04139294a5057f8439d772dbd9James DongyFrac   RN 14
68bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
69bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;// Function exports and imports
70bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
71bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    IMPORT  h264bsdFillBlock
72bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
73bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    EXPORT  h264bsdInterpolateChromaVer
74bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
75bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  Function arguments
76bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//
77bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  u8 *ref,                   : 0xc4
78bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  u8 *predPartChroma,        : 0xc8
79bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  i32 x0,                    : 0xcc
80bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  i32 y0,                    : 0xd0
81bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  u32 width,                 : 0xf8
82bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  u32 height,                : 0xfc
83bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  u32 yFrac,                 : 0x100
84bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  u32 chromaPartWidth,       : 0x104
85bebc99d6fa433c04139294a5057f8439d772dbd9James Dong;//  u32 chromaPartHeight       : 0x108
86bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
87bebc99d6fa433c04139294a5057f8439d772dbd9James Dongh264bsdInterpolateChromaVer
88bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STMFD   sp!, {r0-r11,lr}
89bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SUB     sp, sp, #0xc4
90bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
91bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     chrPW, [sp, #0x104]     ;// chromaPartWidth
92bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     width, [sp, #0xf8]      ;// width
93bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    CMP     x0, #0
94bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BLT     do_fill
95bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
96bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     tmp1, x0, chrPW         ;// tmp1 = x0+ chromaPartWidth
97bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    CMP     tmp1, width             ;// x0+chromaPartWidth > width
98bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BHI     do_fill
99bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
100bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    CMP     y0, #0
101bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BLT     do_fill
102bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
103bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     height, [sp, #0xfc]     ;// height
104bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     tmp1, y0, chrPH         ;// tmp1 = y0 + chromaPartHeight
105bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     tmp1, tmp1, #1          ;// tmp1 = y0 + chromaPartHeight + 1
106bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    CMP     tmp1, height
107bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BLS     skip_fill
108bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
109bebc99d6fa433c04139294a5057f8439d772dbd9James Dongdo_fill
110bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
111bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     height, [sp, #0xfc]     ;// height
112bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     tmp1, chrPH, #1         ;// tmp1 = chromaPartHeight+1
113bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp2, chrPW             ;// tmp2 = chromaPartWidth
114bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STMIA   sp,{width,height,chrPW,tmp1,tmp2}
115bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     block, sp, #0x1c        ;// block
116bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BL      h264bsdFillBlock
117bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
118bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     x0, [sp, #0xcc]
119bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     y0, [sp, #0xd0]
120bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     ref, [sp, #0xc4]        ;// ref
121bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STMIA   sp,{width,height,chrPW,tmp1,tmp2}
122bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     block, sp, #0x1c        ;// block
123bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MLA     ref, height, width, ref ;// ref += width * height;
124bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MLA     block, chrPW, tmp1, block;// block + (chromaPW)*(chromaPH+1)
125bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BL      h264bsdFillBlock
126bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
127bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     x0, #0                  ;// x0 = 0
128bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     y0, #0                  ;// y0 = 0
129bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STR     x0, [sp, #0xcc]
130bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STR     y0, [sp, #0xd0]
131bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     ref, sp, #0x1c          ;// ref = block
132bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STR     ref, [sp, #0xc4]        ;// ref
133bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
134bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STR     tmp1, [sp, #0xfc]       ;// height
135bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STR     chrPW, [sp, #0xf8]      ;// width
136bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     width, chrPW
137bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
138bebc99d6fa433c04139294a5057f8439d772dbd9James Dongskip_fill
139bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MLA     tmp3, y0, width, x0     ;// tmp3 = y0*width+x0
140bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     yFrac, [sp, #0x100]     ;// yFrac
141bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     ptrA, ref, tmp3         ;// ptrA = ref + y0*width+x0
142bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    RSB     valY, yFrac, #8         ;// valY = 8-yFrac
143bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
144bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     mb, [sp, #0xc8]         ;// predPartChroma
145bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
146bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
147bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// pack values to count register
148bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// [31:28] loop_x (chromaPartWidth-1)
149bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// [27:24] loop_y (chromaPartHeight-1)
150bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// [23:20] chromaPartWidth-1
151bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// [19:16] chromaPartHeight-1
152bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// [15:00] nothing
153bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
154bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SUB     tmp2, chrPH, #1             ;// chromaPartHeight-1
155bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SUB     tmp1, chrPW, #1             ;// chromaPartWidth-1
156bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     count, count, tmp2, LSL #16 ;// chromaPartHeight-1
157bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     count, count, tmp2, LSL #24 ;// loop_y
158bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     count, count, tmp1, LSL #20 ;// chromaPartWidth-1
159bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    AND     tmp2, count, #0x00F00000    ;// loop_x
160bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   valY, valY, yFrac, LSL #16  ;// |yFrac|valY |
161bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     valY, valY, LSL #3          ;// multiply by 8 in advance
162bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     c32, #32
163bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
164bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
165bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;///////////////////////////////////////////////////////////////////////////
166bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// Cb
167bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;///////////////////////////////////////////////////////////////////////////
168bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
169bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// 2x2 pels per iteration
170bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// bilinear vertical interpolation
171bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
172bebc99d6fa433c04139294a5057f8439d772dbd9James Dongloop1_y
173bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     count, count, tmp2, LSL #8
174bebc99d6fa433c04139294a5057f8439d772dbd9James Dongloop1_x
175bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// Process 2x2 block
176bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp2, [ptrA,width]          ;// 2 row, 1 col
177bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp3, [ptrA,width, LSL #1]  ;// 3 row, 1 col
178bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp1, [ptrA],#1             ;// 1 row, 1 col
179bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
180bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp5, [ptrA,width]          ;// 2 row, 2 col
181bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp6, [ptrA,width, LSL #1]  ;// 3 row, 2 col
182bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp4, [ptrA],#1             ;// 1 row, 2 col
183bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
184bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   tmp1, tmp1, tmp2, LSL #16   ;// |B|A|
185bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   tmp2, tmp2, tmp3, LSL #16   ;// |C|B|
186bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   tmp4, tmp4, tmp5, LSL #16   ;// |B|A|
187bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
188bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SMLAD   tmp7, tmp2, valY, c32       ;// multiply
189bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   tmp5, tmp5, tmp6, LSL #16   ;// |C|B|
190bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SMLAD   tmp2, tmp1, valY, c32       ;// multiply
191bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SMLAD   tmp8, tmp5, valY, c32       ;// multiply
192bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SMLAD   tmp5, tmp4, valY, c32       ;// multiply
193bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
194bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp7, tmp7, LSR #6          ;// scale down
195bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STRB    tmp7, [mb,#8]               ;// store row 2 col 1
196bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp2, tmp2, LSR #6          ;// scale down
197bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STRB    tmp2, [mb],#1               ;// store row 1 col 1
198bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
199bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp8, tmp8, LSR #6          ;// scale down
200bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STRB    tmp8, [mb,#8]               ;// store row 2 col 2
201bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
202bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STRB    tmp5, [mb],#1               ;// store row 1 col 2
203bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
204bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
205bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SUBS    count, count, #2<<28
206bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BCS     loop1_x
207bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
208bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    AND     tmp2, count, #0x00F00000
209bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
210bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADDS    mb, mb, #16
211bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SBC     mb, mb, tmp2, LSR #20
212bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     ptrA, ptrA, width, LSL #1
213bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SBC     ptrA, ptrA, tmp2, LSR #20
214bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
215bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADDS    count, count, #0xE << 24
216bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BGE     loop1_y
217bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
218bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;///////////////////////////////////////////////////////////////////////////
219bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// Cr
220bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;///////////////////////////////////////////////////////////////////////////
221bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     height, [sp,#0xfc]          ;// height
222bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     ref, [sp, #0xc4]            ;// ref
223bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     tmp1, [sp, #0xd0]           ;// y0
224bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     tmp2, [sp, #0xcc]           ;// x0
225bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDR     mb, [sp, #0xc8]             ;// predPartChroma
226bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
227bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     tmp1, height, tmp1
228bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MLA     tmp3, tmp1, width, tmp2
229bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     ptrA, ref, tmp3
230bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     mb, mb, #64
231bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
232bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    AND     count, count, #0x00FFFFFF
233bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    AND     tmp1, count, #0x000F0000
234bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     count, count, tmp1, LSL #8
235bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    AND     tmp2, count, #0x00F00000
236bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
237bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// 2x2 pels per iteration
238bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// bilinear vertical interpolation
239bebc99d6fa433c04139294a5057f8439d772dbd9James Dongloop2_y
240bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     count, count, tmp2, LSL #8
241bebc99d6fa433c04139294a5057f8439d772dbd9James Dongloop2_x
242bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ;// Process 2x2 block
243bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp2, [ptrA,width]          ;// 2 row, 1 col
244bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp3, [ptrA,width, LSL #1]  ;// 3 row, 1 col
245bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp1, [ptrA],#1             ;// 1 row, 1 col
246bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
247bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp5, [ptrA,width]          ;// 2 row, 2 col
248bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp6, [ptrA,width, LSL #1]  ;// 3 row, 2 col
249bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDRB    tmp4, [ptrA],#1             ;// 1 row, 2 col
250bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
251bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   tmp1, tmp1, tmp2, LSL #16   ;// |B|A|
252bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   tmp2, tmp2, tmp3, LSL #16   ;// |C|B|
253bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   tmp4, tmp4, tmp5, LSL #16   ;// |B|A|
254bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
255bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SMLAD   tmp7, tmp2, valY, c32       ;// multiply
256bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    PKHBT   tmp5, tmp5, tmp6, LSL #16   ;// |C|B|
257bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SMLAD   tmp2, tmp1, valY, c32       ;// multiply
258bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SMLAD   tmp8, tmp5, valY, c32       ;// multiply
259bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SMLAD   tmp5, tmp4, valY, c32       ;// multiply
260bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
261bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp7, tmp7, LSR #6          ;// scale down
262bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STRB    tmp7, [mb,#8]               ;// store row 2 col 1
263bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp2, tmp2, LSR #6          ;// scale down
264bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STRB    tmp2, [mb],#1               ;// store row 1 col 1
265bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
266bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp8, tmp8, LSR #6          ;// scale down
267bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STRB    tmp8, [mb,#8]               ;// store row 2 col 2
268bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    MOV     tmp5, tmp5, LSR #6          ;// scale down
269bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    STRB    tmp5, [mb],#1               ;// store row 1 col 2
270bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
271bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
272bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SUBS    count, count, #2<<28
273bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BCS     loop2_x
274bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
275bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    AND     tmp2, count, #0x00F00000
276bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
277bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADDS    mb, mb, #16
278bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SBC     mb, mb, tmp2, LSR #20
279bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     ptrA, ptrA, width, LSL #1
280bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    SBC     ptrA, ptrA, tmp2, LSR #20
281bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
282bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADDS    count, count, #0xE << 24
283bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    BGE     loop2_y
284bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
285bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    ADD     sp,sp,#0xd4
286bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    LDMFD   sp!, {r4-r11,pc}
287bebc99d6fa433c04139294a5057f8439d772dbd9James Dong
288bebc99d6fa433c04139294a5057f8439d772dbd9James Dong    END
289