1/* libs/pixelflinger/t32cb16blend.S
2**
3** Copyright 2010, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#ifdef DEBUG
19#define DBG
20#else
21#define DBG #
22#endif
23
24/*
25 * blend one of 2 16bpp RGB pixels held in dreg selected by shift
26 * with the 32bpp ABGR pixel held in src and store the result in fb
27 *
28 * Assumes that the dreg data is little endian and that
29 * the the second pixel (shift==16) will be merged into
30 * the fb result
31 *
32 * Uses $t0,$t6,$t7,$t8
33 */
34
35#if __mips==32 && __mips_isa_rev>=2
36    .macro pixel dreg src fb shift
37    /*
38     * sA = s >> 24
39     * f = 0x100 - (sA + (sA>>7))
40     */
41DBG .set    noat
42DBG rdhwr   $at,$2
43DBG .set    at
44
45    srl  $t7,\src,24
46    srl  $t6,$t7,7
47    addu $t7,$t6
48    li   $t6,0x100
49    subu $t7,$t6,$t7
50
51    /* red */
52    ext  $t8,\dreg,\shift+6+5,5         # dst[\shift:15..11]
53    mul  $t6,$t8,$t7
54    ext  $t0,\dreg,\shift+5,6           # start green extraction dst[\shift:10..5]
55    ext  $t8,\src,3,5               # src[7..3]
56    srl  $t6,8
57    addu $t8,$t6
58.if \shift!=0
59    sll  $t8,\shift+11
60    or   \fb,$t8
61.else
62    sll  \fb,$t8,11
63.endif
64
65    /* green */
66    mul  $t8,$t0,$t7
67    ext  $t0,\dreg,\shift,5         # start blue extraction dst[\shift:4..0]
68    ext  $t6,\src,2+8,6             # src[15..10]
69    srl  $t8,8
70    addu $t8,$t6
71
72    /* blue */
73    mul  $t0,$t0,$t7
74    sll  $t8, $t8, \shift+5
75    or   \fb, \fb, $t8
76    ext  $t6,\src,(3+8+8),5
77    srl  $t8,$t0,8
78    addu $t8,$t6
79    sll  $t8, $t8, \shift
80    or   \fb, \fb, $t8
81
82DBG .set    noat
83DBG rdhwr $t8,$2
84DBG subu  $t8,$at
85DBG sltu  $at,$t8,$v0
86DBG movn  $v0,$t8,$at
87DBG sgtu  $at,$t8,$v1
88DBG movn  $v1,$t8,$at
89DBG .set    at
90    .endm
91
92#else
93
94    .macro pixel dreg src fb shift
95    /*
96     * sA = s >> 24
97     * f = 0x100 - (sA + (sA>>7))
98     */
99DBG .set    push
100DBG .set    noat
101DBG .set    mips32r2
102DBG rdhwr   $at,$2
103DBG .set    pop
104
105    srl  $t7,\src,24
106    srl  $t6,$t7,7
107    addu $t7,$t6
108    li   $t6,0x100
109    subu $t7,$t6,$t7
110
111    /*
112     * red
113     * dR = (d >> (6 + 5)) & 0x1f;
114     * dR = (f*dR)>>8
115     * sR = (s >> (   3)) & 0x1f;
116     * sR += dR
117     * fb |= sR << 11
118     */
119    srl  $t8,\dreg,\shift+6+5
120.if \shift==0
121    and  $t8,0x1f
122.endif
123    mul  $t8,$t8,$t7
124    srl  $t6,\src,3
125    and  $t6,0x1f
126    srl  $t8,8
127    addu $t8,$t6
128.if \shift!=0
129    sll  $t8,\shift+11
130    or   \fb,$t8
131.else
132    sll  \fb,$t8,11
133.endif
134
135        /*
136     * green
137     * dG = (d >> 5) & 0x3f
138     * dG = (f*dG) >> 8
139     * sG = (s >> ( 8+2))&0x3F;
140     */
141    srl  $t8,\dreg,\shift+5
142    and  $t8,0x3f
143    mul  $t8,$t8,$t7
144    srl  $t6,\src,8+2
145    and  $t6,0x3f
146    srl  $t8,8
147    addu $t8,$t6
148    sll  $t8,\shift + 5
149    or   \fb,$t8
150
151    /* blue */
152.if \shift!=0
153    srl  $t8,\dreg,\shift
154    and  $t8,0x1f
155.else
156    and  $t8,\dreg,0x1f
157.endif
158    mul  $t8,$t8,$t7
159    srl  $t6,\src,(8+8+3)
160    and  $t6,0x1f
161    srl  $t8,8
162    addu $t8,$t6
163.if \shift!=0
164    sll  $t8,\shift
165.endif
166    or   \fb,$t8
167DBG .set    push
168DBG .set    noat
169DBG .set    mips32r2
170DBG rdhwr   $t8,$2
171DBG subu    $t8,$at
172DBG sltu    $at,$t8,$v0
173DBG movn    $v0,$t8,$at
174DBG sgtu    $at,$t8,$v1
175DBG movn    $v1,$t8,$at
176DBG .set    pop
177    .endm
178#endif
179
180    .text
181    .align
182
183    .global scanline_t32cb16blend_mips
184    .ent    scanline_t32cb16blend_mips
185scanline_t32cb16blend_mips:
186DBG li    $v0,0xffffffff
187DBG li    $v1,0
188    /* Align the destination if necessary */
189    and   $t0,$a0,3
190    beqz  $t0,aligned
191
192    /* as long as there is at least one pixel */
193    beqz  $a2,done
194
195    lw    $t4,($a1)
196    addu  $a0,2
197    addu  $a1,4
198    beqz  $t4,1f
199    lhu   $t3,-2($a0)
200    pixel $t3,$t4,$t1,0
201    sh    $t1,-2($a0)
2021:  subu  $a2,1
203
204aligned:
205    /* Check to see if its worth unrolling the loop */
206    subu  $a2,4
207    bltz  $a2,tail
208
209    /* Process 4 pixels at a time */
210fourpixels:
211    /* 1st pair of pixels */
212    lw    $t4,0($a1)
213    lw    $t5,4($a1)
214    addu  $a0,8
215    addu  $a1,16
216
217    /* both are zero, skip this pair */
218    or    $t3,$t4,$t5
219    beqz  $t3,1f
220
221    /* load the destination */
222    lw    $t3,-8($a0)
223
224    pixel $t3,$t4,$t1,0
225    andi  $t1, 0xFFFF
226    pixel $t3,$t5,$t1,16
227    sw    $t1,-8($a0)
228
2291:
230    /* 2nd pair of pixels */
231    lw    $t4,-8($a1)
232    lw    $t5,-4($a1)
233
234    /* both are zero, skip this pair */
235    or    $t3,$t4,$t5
236    beqz  $t3,1f
237
238    /* load the destination */
239    lw    $t3,-4($a0)
240
241    pixel $t3,$t4,$t1,0
242    andi  $t1, 0xFFFF
243    pixel $t3,$t5,$t1,16
244    sw    $t1,-4($a0)
245
2461:  subu  $a2,4
247    bgtz  $a2,fourpixels
248
249tail:
250    /* the pixel count underran, restore it now */
251    addu  $a2,4
252
253    /* handle the last 0..3 pixels */
254    beqz  $a2,done
255onepixel:
256    lw    $t4,($a1)
257    addu  $a0,2
258    addu  $a1,4
259    beqz  $t4,1f
260    lhu   $t3,-2($a0)
261    pixel $t3,$t4,$t1,0
262    sh    $t1,-2($a0)
2631:  subu  $a2,1
264    bnez  $a2,onepixel
265done:
266DBG .set    push
267DBG .set    mips32r2
268DBG rdhwr   $a0,$3
269DBG mul     $v0,$a0
270DBG mul     $v1,$a0
271DBG .set    pop
272    j     $ra
273    .end    scanline_t32cb16blend_mips
274