11176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
21176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Copyright © 2008 Mozilla Corporation
31176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Copyright © 2010 Nokia Corporation
41176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
51176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Permission to use, copy, modify, distribute, and sell this software and its
61176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * documentation for any purpose is hereby granted without fee, provided that
71176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * the above copyright notice appear in all copies and that both that
81176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * copyright notice and this permission notice appear in supporting
91176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * documentation, and that the name of Mozilla Corporation not be used in
101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * advertising or publicity pertaining to distribution of the software without
111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * specific, written prior permission.  Mozilla Corporation makes no
121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * representations about the suitability of this software for any purpose.  It
131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * is provided "as is" without express or implied warranty.
141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SOFTWARE.
231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Author:  Jeff Muizelaar (jeff@infidigm.net)
251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Prevent the stack from becoming executable */
291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#if defined(__linux__) && defined(__ELF__)
301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.section .note.GNU-stack,"",%progbits
311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif
321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.text
341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.arch armv6
351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.object_arch armv4
361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.arm
371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.altmacro
381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.p2align 2
391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Supplementary macro for setting function attributes */
411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro pixman_asm_function fname
421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.func fname
431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.global fname
441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef __ELF__
451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.hidden fname
461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.type fname, %function
471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif
481176bdada62cabc6ec4b0308a930e83b679d5d36John Reckfname:
491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Note: This code is only using armv5te instructions (not even armv6),
531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *       be split into a few variants, tuned for each microarchitecture.
551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * have efficient write combining), it needs to be changed to use 16-byte
581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * aligned writes using STM instruction.
591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Nearest scanline scaler macro template uses the following arguments:
611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  fname                     - name of the function to generate
621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  t                         - type suffix for LDR/STR instructions
641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  prefetch_distance         - prefetch in the source image by that many
651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *                              pixels ahead
661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  prefetch_braking_distance - stop prefetching when that many pixels are
671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *                              remaining before the end of scanline
681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                      prefetch_distance,        \
721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                      prefetch_braking_distance
731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
741176bdada62cabc6ec4b0308a930e83b679d5d36John Reckpixman_asm_function fname
751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	W		.req	r0
761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	DST		.req	r1
771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	SRC		.req	r2
781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	VX		.req	r3
791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	UNIT_X		.req	ip
801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	TMP1		.req	r4
811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	TMP2		.req	r5
821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	VXMASK		.req	r6
831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	PF_OFFS		.req	r7
841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	SRC_WIDTH_FIXED	.req	r8
851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	ldr	UNIT_X, [sp]
871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	push	{r4, r5, r6, r7, r8, r10}
881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	mvn	VXMASK, #((1 << bpp_shift) - 1)
891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	ldr	SRC_WIDTH_FIXED, [sp, #28]
901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	/* define helper macro */
921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.macro	scale_2_pixels
931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		ldr&t	TMP1, [SRC, TMP1]
941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		and	TMP2, VXMASK, VX, asr #(16 - bpp_shift)
951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		adds	VX, VX, UNIT_X
961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		str&t	TMP1, [DST], #(1 << bpp_shift)
971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck9:		subpls	VX, VX, SRC_WIDTH_FIXED
981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		bpl	9b
991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		ldr&t	TMP2, [SRC, TMP2]
1011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
1021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		adds	VX, VX, UNIT_X
1031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		str&t	TMP2, [DST], #(1 << bpp_shift)
1041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck9:		subpls	VX, VX, SRC_WIDTH_FIXED
1051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck		bpl	9b
1061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.endm
1071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	/* now do the scaling */
1091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	and	TMP1, VXMASK, VX, asr #(16 - bpp_shift)
1101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	adds	VX, VX, UNIT_X
1111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck9:	subpls	VX, VX, SRC_WIDTH_FIXED
1121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bpl	9b
1131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	subs	W, W, #(8 + prefetch_braking_distance)
1141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	blt	2f
1151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	/* calculate prefetch offset */
1161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	mov	PF_OFFS, #prefetch_distance
1171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
1181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck1:	/* main loop, process 8 pixels per iteration with prefetch */
1191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	pld	[SRC, PF_OFFS, asr #(16 - bpp_shift)]
1201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	add	PF_OFFS, UNIT_X, lsl #3
1211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	scale_2_pixels
1221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	scale_2_pixels
1231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	scale_2_pixels
1241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	scale_2_pixels
1251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	subs	W, W, #8
1261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bge	1b
1271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck2:
1281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	subs	W, W, #(4 - 8 - prefetch_braking_distance)
1291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	blt	2f
1301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck1:	/* process the remaining pixels */
1311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	scale_2_pixels
1321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	scale_2_pixels
1331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	subs	W, W, #4
1341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bge	1b
1351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck2:
1361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	tst	W, #2
1371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	beq	2f
1381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	scale_2_pixels
1391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck2:
1401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	tst	W, #1
1411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	ldrne&t	TMP1, [SRC, TMP1]
1421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	strne&t	TMP1, [DST]
1431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	/* cleanup helper macro */
1441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.purgem	scale_2_pixels
1451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	DST
1461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	SRC
1471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	W
1481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	VX
1491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	UNIT_X
1501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	TMP1
1511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	TMP2
1521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	VXMASK
1531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq	PF_OFFS
1541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	.unreq  SRC_WIDTH_FIXED
1551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	/* return */
1561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	pop	{r4, r5, r6, r7, r8, r10}
1571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bx	lr
1581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endfunc
1591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
1601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1611176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_nearest_scanline_func \
1621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
1631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1641176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_nearest_scanline_func \
1651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32
166