11176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 21176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Copyright © 2008 Mozilla Corporation 31176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Copyright © 2010 Nokia Corporation 41176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 51176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Permission to use, copy, modify, distribute, and sell this software and its 61176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * documentation for any purpose is hereby granted without fee, provided that 71176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * the above copyright notice appear in all copies and that both that 81176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * copyright notice and this permission notice appear in supporting 91176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * documentation, and that the name of Mozilla Corporation not be used in 101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * advertising or publicity pertaining to distribution of the software without 111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * specific, written prior permission. Mozilla Corporation makes no 121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * representations about the suitability of this software for any purpose. It 131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * is provided "as is" without express or implied warranty. 141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS 161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SOFTWARE. 231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Author: Jeff Muizelaar (jeff@infidigm.net) 251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Prevent the stack from becoming executable */ 291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#if defined(__linux__) && defined(__ELF__) 301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.section .note.GNU-stack,"",%progbits 311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif 321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .text 341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .arch armv6 351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .object_arch armv4 361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .arm 371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .altmacro 381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .p2align 2 391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Supplementary macro for setting function attributes */ 411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro pixman_asm_function fname 421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .func fname 431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .global fname 441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef __ELF__ 451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .hidden fname 461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .type fname, %function 471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif 481176bdada62cabc6ec4b0308a930e83b679d5d36John Reckfname: 491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Note: This code is only using armv5te instructions (not even armv6), 531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * but is scheduled for ARM Cortex-A8 pipeline. So it might need to 541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * be split into a few variants, tuned for each microarchitecture. 551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * TODO: In order to get good performance on ARM9/ARM11 cores (which don't 571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * have efficient write combining), it needs to be changed to use 16-byte 581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * aligned writes using STM instruction. 591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Nearest scanline scaler macro template uses the following arguments: 611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * fname - name of the function to generate 621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes 631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * t - type suffix for LDR/STR instructions 641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * prefetch_distance - prefetch in the source image by that many 651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * pixels ahead 661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * prefetch_braking_distance - stop prefetching when that many pixels are 671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * remaining before the end of scanline 681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro generate_nearest_scanline_func fname, bpp_shift, t, \ 711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck prefetch_distance, \ 721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck prefetch_braking_distance 731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 741176bdada62cabc6ec4b0308a930e83b679d5d36John Reckpixman_asm_function fname 751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck W .req r0 761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck DST .req r1 771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck SRC .req r2 781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck VX .req r3 791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck UNIT_X .req ip 801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck TMP1 .req r4 811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck TMP2 .req r5 821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck VXMASK .req r6 831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF_OFFS .req r7 841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck SRC_WIDTH_FIXED .req r8 851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldr UNIT_X, [sp] 871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck push {r4, r5, r6, r7, r8, r10} 881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck mvn VXMASK, #((1 << bpp_shift) - 1) 891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldr SRC_WIDTH_FIXED, [sp, #28] 901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* define helper macro */ 921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .macro scale_2_pixels 931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldr&t TMP1, [SRC, TMP1] 941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck and TMP2, VXMASK, VX, asr #(16 - bpp_shift) 951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck adds VX, VX, UNIT_X 961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck str&t TMP1, [DST], #(1 << bpp_shift) 971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck9: subpls VX, VX, SRC_WIDTH_FIXED 981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bpl 9b 991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldr&t TMP2, [SRC, TMP2] 1011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck and TMP1, VXMASK, VX, asr #(16 - bpp_shift) 1021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck adds VX, VX, UNIT_X 1031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck str&t TMP2, [DST], #(1 << bpp_shift) 1041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck9: subpls VX, VX, SRC_WIDTH_FIXED 1051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bpl 9b 1061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endm 1071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* now do the scaling */ 1091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck and TMP1, VXMASK, VX, asr #(16 - bpp_shift) 1101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck adds VX, VX, UNIT_X 1111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck9: subpls VX, VX, SRC_WIDTH_FIXED 1121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bpl 9b 1131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs W, W, #(8 + prefetch_braking_distance) 1141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck blt 2f 1151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* calculate prefetch offset */ 1161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck mov PF_OFFS, #prefetch_distance 1171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck mla PF_OFFS, UNIT_X, PF_OFFS, VX 1181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck1: /* main loop, process 8 pixels per iteration with prefetch */ 1191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pld [SRC, PF_OFFS, asr #(16 - bpp_shift)] 1201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck add PF_OFFS, UNIT_X, lsl #3 1211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck scale_2_pixels 1221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck scale_2_pixels 1231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck scale_2_pixels 1241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck scale_2_pixels 1251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs W, W, #8 1261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bge 1b 1271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck2: 1281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs W, W, #(4 - 8 - prefetch_braking_distance) 1291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck blt 2f 1301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck1: /* process the remaining pixels */ 1311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck scale_2_pixels 1321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck scale_2_pixels 1331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs W, W, #4 1341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bge 1b 1351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck2: 1361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst W, #2 1371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq 2f 1381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck scale_2_pixels 1391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck2: 1401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst W, #1 1411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldrne&t TMP1, [SRC, TMP1] 1421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck strne&t TMP1, [DST] 1431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* cleanup helper macro */ 1441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .purgem scale_2_pixels 1451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq DST 1461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq SRC 1471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq W 1481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq VX 1491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq UNIT_X 1501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq TMP1 1511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq TMP2 1521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq VXMASK 1531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq PF_OFFS 1541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq SRC_WIDTH_FIXED 1551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* return */ 1561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pop {r4, r5, r6, r7, r8, r10} 1571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bx lr 1581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endfunc 1591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 1601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1611176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_nearest_scanline_func \ 1621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 1631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1641176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_nearest_scanline_func \ 1651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 166