1/* libs/pixelflinger/t32cb16blend.S
2**
3** Copyright 2010, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#ifdef DEBUG
19#define DBG
20#else
21#define DBG #
22#endif
23
24/*
25 * blend one of 2 16bpp RGB pixels held in dreg selected by shift
26 * with the 32bpp ABGR pixel held in src and store the result in fb
27 *
28 * Assumes that the dreg data is little endian and that
29 * the the second pixel (shift==16) will be merged into
30 * the fb result
31 *
32 * Uses $t0,$t6,$t7,$t8
33 */
34
35#if __mips==32 && __mips_isa_rev>=2
36	.macro pixel dreg src fb shift
37	/*
38	 * sA = s >> 24
39	 * f = 0x100 - (sA + (sA>>7))
40	 */
41DBG	.set	noat
42DBG	rdhwr	$at,$2
43DBG	.set	at
44
45	srl	$t7,\src,24
46	srl	$t6,$t7,7
47	addu	$t7,$t6
48	li	$t6,0x100
49	subu	$t7,$t6,$t7
50
51	/* red */
52	ext	$t8,\dreg,\shift+6+5,5			# dst[\shift:15..11]
53	mul	$t6,$t8,$t7
54	ext	$t0,\dreg,\shift+5,6			# start green extraction dst[\shift:10..5]
55	ext	$t8,\src,3,5				# src[7..3]
56	srl	$t6,8
57	addu	$t8,$t6
58	ins	\fb,$t8,\shift+6+5,5			# dst[\shift:15..11]
59
60        /* green */
61	mul	$t8,$t0,$t7
62	ext	$t0,\dreg,\shift,5			# start blue extraction dst[\shift:4..0]
63	ext	$t6,\src,2+8,6				# src[15..10]
64	srl	$t8,8
65        addu	$t8,$t6
66
67	/* blue */
68	mul	$t0,$t0,$t7
69	ins	\fb,$t8,\shift+5,6			# finish green insertion dst[\shift:10..5]
70	ext	$t6,\src,(3+8+8),5
71	srl	$t8,$t0,8
72	addu	$t8,$t6
73	ins	\fb,$t8,\shift,5
74
75DBG	.set	noat
76DBG	rdhwr	$t8,$2
77DBG	subu	$t8,$at
78DBG	sltu	$at,$t8,$v0
79DBG	movn	$v0,$t8,$at
80DBG	sgtu	$at,$t8,$v1
81DBG	movn	$v1,$t8,$at
82DBG	.set	at
83	.endm
84
85#else
86
87	.macro pixel dreg src fb shift
88	/*
89	 * sA = s >> 24
90	 * f = 0x100 - (sA + (sA>>7))
91	 */
92DBG	.set	push
93DBG	.set	noat
94DBG	.set	mips32r2
95DBG 	rdhwr	$at,$2
96DBG	.set	pop
97
98	srl	$t7,\src,24
99	srl	$t6,$t7,7
100	addu	$t7,$t6
101	li	$t6,0x100
102	subu	$t7,$t6,$t7
103
104	/*
105	 * red
106	 * dR = (d >> (6 + 5)) & 0x1f;
107	 * dR = (f*dR)>>8
108	 * sR = (s >> (   3)) & 0x1f;
109	 * sR += dR
110	 * fb |= sR << 11
111	 */
112	srl	$t8,\dreg,\shift+6+5
113.if \shift==0
114	and     $t8,0x1f
115.endif
116	mul	$t8,$t8,$t7
117	srl	$t6,\src,3
118	and	$t6,0x1f
119	srl	$t8,8
120	addu	$t8,$t6
121.if \shift!=0
122	sll	$t8,\shift+11
123	or	\fb,$t8
124.else
125	sll	\fb,$t8,11
126.endif
127
128        /*
129	 * green
130	 * dG = (d >> 5) & 0x3f
131	 * dG = (f*dG) >> 8
132	 * sG = (s >> ( 8+2))&0x3F;
133	 */
134	srl	$t8,\dreg,\shift+5
135        and	$t8,0x3f
136	mul	$t8,$t8,$t7
137        srl	$t6,\src,8+2
138        and     $t6,0x3f
139	srl	$t8,8
140        addu	$t8,$t6
141	sll	$t8,\shift + 5
142	or	\fb,$t8
143
144	/* blue */
145.if \shift!=0
146	srl	$t8,\dreg,\shift
147	and	$t8,0x1f
148.else
149	and	$t8,\dreg,0x1f
150.endif
151	mul	$t8,$t8,$t7
152	srl	$t6,\src,(8+8+3)
153	and	$t6,0x1f
154	srl	$t8,8
155	addu	$t8,$t6
156.if \shift!=0
157	sll	$t8,\shift
158.endif
159	or	\fb,$t8
160DBG	.set	push
161DBG	.set	noat
162DBG	.set	mips32r2
163DBG	rdhwr	$t8,$2
164DBG	subu	$t8,$at
165DBG	sltu	$at,$t8,$v0
166DBG	movn	$v0,$t8,$at
167DBG	sgtu	$at,$t8,$v1
168DBG	movn	$v1,$t8,$at
169DBG	.set	pop
170	.endm
171#endif
172
173	.text
174	.align
175
176	.global scanline_t32cb16blend_mips
177	.ent	scanline_t32cb16blend_mips
178scanline_t32cb16blend_mips:
179DBG	li	$v0,0xffffffff
180DBG	li	$v1,0
181	/* Align the destination if necessary */
182	and	$t0,$a0,3
183	beqz	$t0,aligned
184
185	/* as long as there is at least one pixel */
186	beqz	$a2,done
187
188	lw	$t4,($a1)
189	addu	$a0,2
190	addu	$a1,4
191	beqz	$t4,1f
192	lhu	$t3,-2($a0)
193	pixel   $t3,$t4,$t1,0
194	sh	$t1,-2($a0)
1951:	subu	$a2,1
196
197aligned:
198	/* Check to see if its worth unrolling the loop */
199	subu	$a2,4
200	bltz	$a2,tail
201
202	/* Process 4 pixels at a time */
203fourpixels:
204	/* 1st pair of pixels */
205	lw	$t4,0($a1)
206	lw	$t5,4($a1)
207	addu	$a0,8
208	addu	$a1,16
209
210	/* both are zero, skip this pair */
211	or	$t3,$t4,$t5
212	beqz	$t3,1f
213
214	/* load the destination */
215	lw	$t3,-8($a0)
216
217	pixel	$t3,$t4,$t1,0
218	pixel	$t3,$t5,$t1,16
219	sw	$t1,-8($a0)
220
2211:
222	/* 2nd pair of pixels */
223	lw	$t4,-8($a1)
224	lw	$t5,-4($a1)
225
226	/* both are zero, skip this pair */
227	or	$t3,$t4,$t5
228	beqz	$t3,1f
229
230	/* load the destination */
231	lw	$t3,-4($a0)
232
233	pixel	$t3,$t4,$t1,0
234	pixel	$t3,$t5,$t1,16
235	sw	$t1,-4($a0)
236
2371:	subu    $a2,4
238	bgtz	$a2,fourpixels
239
240tail:
241	/* the pixel count underran, restore it now */
242	addu	$a2,4
243
244	/* handle the last 0..3 pixels */
245	beqz	$a2,done
246onepixel:
247	lw	$t4,($a1)
248	addu	$a0,2
249	addu	$a1,4
250	beqz	$t4,1f
251	lhu	$t3,-2($a0)
252	pixel   $t3,$t4,$t1,0
253	sh	$t1,-2($a0)
2541:	subu	$a2,1
255	bnez	$a2,onepixel
256done:
257DBG	.set    push
258DBG	.set    mips32r2
259DBG 	rdhwr	$a0,$3
260DBG 	mul	$v0,$a0
261DBG 	mul	$v1,$a0
262DBG	.set    pop
263	j	$ra
264	.end	scanline_t32cb16blend_mips
265