11176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 21176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Copyright © 2012 Raspberry Pi Foundation 31176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Copyright © 2012 RISC OS Open Ltd 41176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 51176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Permission to use, copy, modify, distribute, and sell this software and its 61176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * documentation for any purpose is hereby granted without fee, provided that 71176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * the above copyright notice appear in all copies and that both that 81176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * copyright notice and this permission notice appear in supporting 91176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * documentation, and that the name of the copyright holders not be used in 101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * advertising or publicity pertaining to distribution of the software without 111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * specific, written prior permission. The copyright holders make no 121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * representations about the suitability of this software for any purpose. It 131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * is provided "as is" without express or implied warranty. 141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS 161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SOFTWARE. 231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Author: Ben Avison (bavison@riscosopen.org) 251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Because the alignment of pixel data to cachelines, and even the number of 301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * cachelines per row can vary from row to row, and because of the need to 311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * preload each scanline once and only once, this prefetch strategy treats 321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * each row of pixels independently. When a pixel row is long enough, there 331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * are three distinct phases of prefetch: 341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * * an inner loop section, where each time a cacheline of data is 351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * processed, another cacheline is preloaded (the exact distance ahead is 361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * determined empirically using profiling results from lowlevel-blt-bench) 371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * * a leading section, where enough cachelines are preloaded to ensure no 381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * cachelines escape being preloaded when the inner loop starts 391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * * a trailing section, where a limited number (0 or more) of cachelines 401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * are preloaded to deal with data (if any) that hangs off the end of the 411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * last iteration of the inner loop, plus any trailing bytes that were not 421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * enough to make up one whole iteration of the inner loop 431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * There are (in general) three distinct code paths, selected between 451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * depending upon how long the pixel row is. If it is long enough that there 461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * is at least one iteration of the inner loop (as described above) then 471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * this is described as the "wide" case. If it is shorter than that, but 481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * there are still enough bytes output that there is at least one 16-byte- 491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * long, 16-byte-aligned write to the destination (the optimum type of 501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * write), then this is the "medium" case. If it is not even this long, then 511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * this is the "narrow" case, and there is no attempt to align writes to 521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 16-byte boundaries. In the "medium" and "narrow" cases, all the 531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * cachelines containing data from the pixel row are prefetched up-front. 541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Determine whether we put the arguments on the stack for debugging. 581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#undef DEBUG_PARAMS 601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Bit flags for 'generate_composite_function' macro which are used 631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * to tune generated functions behavior. 641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_DST_WRITEONLY, 0 661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_DST_READWRITE, 1 671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_COND_EXEC, 0 681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_BRANCH_OVER, 2 691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_PROCESS_PRESERVES_PSR, 0 701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_PROCESS_CORRUPTS_PSR, 4 711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_PROCESS_DOESNT_STORE, 0 721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_PROCESS_DOES_STORE, 8 /* usually because it needs to conditionally skip it */ 731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_NO_SPILL_LINE_VARS, 0 741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_SPILL_LINE_VARS_WIDE, 16 751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_SPILL_LINE_VARS_NON_WIDE, 32 761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_SPILL_LINE_VARS, 48 771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_PROCESS_CORRUPTS_SCRATCH, 0 781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set FLAG_PROCESS_PRESERVES_SCRATCH, 64 791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Offset into stack where mask and source pointer/stride can be accessed. 821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef DEBUG_PARAMS 841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set ARGS_STACK_OFFSET, (9*4+9*4) 851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#else 861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set ARGS_STACK_OFFSET, (9*4) 871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif 881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Constants for selecting preferable prefetch type. 911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set PREFETCH_TYPE_NONE, 0 931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set PREFETCH_TYPE_STANDARD, 1 941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Definitions of macros for load/store of pixel data. 971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro pixldst op, cond=al, numbytes, reg0, reg1, reg2, reg3, base, unaligned=0 1001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if numbytes == 16 1011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if unaligned == 1 1021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond WK®0, [base], #4 1031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond WK®1, [base], #4 1041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond WK®2, [base], #4 1051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond WK®3, [base], #4 1061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 1071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&m&cond&ia base!, {WK®0,WK®1,WK®2,WK®3} 1081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 1091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif numbytes == 8 1101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if unaligned == 1 1111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond WK®0, [base], #4 1121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond WK®1, [base], #4 1131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 1141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&m&cond&ia base!, {WK®0,WK®1} 1151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 1161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif numbytes == 4 1171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond WK®0, [base], #4 1181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif numbytes == 2 1191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond&h WK®0, [base], #2 1201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif numbytes == 1 1211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck op&r&cond&b WK®0, [base], #1 1221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 1231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .error "unsupported size: numbytes" 1241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 1251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 1261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro pixst_baseupdated cond, numbytes, reg0, reg1, reg2, reg3, base 1281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if numbytes == 16 1291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck stm&cond&db base, {WK®0,WK®1,WK®2,WK®3} 1301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif numbytes == 8 1311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck stm&cond&db base, {WK®0,WK®1} 1321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif numbytes == 4 1331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck str&cond WK®0, [base, #-4] 1341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif numbytes == 2 1351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck str&cond&h WK®0, [base, #-2] 1361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif numbytes == 1 1371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck str&cond&b WK®0, [base, #-1] 1381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 1391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .error "unsupported size: numbytes" 1401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 1411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 1421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro pixld cond, numbytes, firstreg, base, unaligned 1441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixldst ld, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base, unaligned 1451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 1461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro pixst cond, numbytes, firstreg, base 1481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (flags) & FLAG_DST_READWRITE 1491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixst_baseupdated cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base 1501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 1511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixldst st, cond, numbytes, %(firstreg+0), %(firstreg+1), %(firstreg+2), %(firstreg+3), base 1521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 1531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 1541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro PF a, x:vararg 1561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_STANDARD) 1571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck a x 1581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 1591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 1601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro preload_leading_step1 bpp, ptr, base 1631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* If the destination is already 16-byte aligned, then we need to preload 1641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * between 0 and prefetch_distance (inclusive) cache lines ahead so there 1651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * are no gaps when the inner loop starts. 1661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 1671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if bpp > 0 1681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bic, ptr, base, #31 1691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set OFFSET, 0 1701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .rept prefetch_distance+1 1711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [ptr, #OFFSET] 1721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set OFFSET, OFFSET+32 1731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endr 1741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 1751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 1761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro preload_leading_step2 bpp, bpp_shift, ptr, base 1781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* However, if the destination is not 16-byte aligned, we may need to 1791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * preload more cache lines than that. The question we need to ask is: 1801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * are the bytes corresponding to the leading pixels more than the amount 1811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * by which the source pointer will be rounded down for preloading, and if 1821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * so, by how many cache lines? Effectively, we want to calculate 1831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * leading_bytes = ((-dst)&15)*src_bpp/dst_bpp 1841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * inner_loop_offset = (src+leading_bytes)&31 1851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * extra_needed = leading_bytes - inner_loop_offset 1861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * and test if extra_needed is <= 0, <= 32, or > 32 (where > 32 is only 1871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * possible when there are 4 src bytes for every 1 dst byte). 1881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 1891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if bpp > 0 1901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc base,DST 1911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* The test can be simplified further when preloading the destination */ 1921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF tst, base, #16 1931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF beq, 61f 1941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 1951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if bpp/dst_w_bpp == 4 1961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF add, SCRATCH, base, WK0, lsl #bpp_shift-dst_bpp_shift 1971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF and, SCRATCH, SCRATCH, #31 1981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF rsb, SCRATCH, SCRATCH, WK0, lsl #bpp_shift-dst_bpp_shift 1991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF sub, SCRATCH, SCRATCH, #1 /* so now ranges are -16..-1 / 0..31 / 32..63 */ 2001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF movs, SCRATCH, SCRATCH, #32-6 /* so this sets NC / nc / Nc */ 2011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bcs, 61f 2021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bpl, 60f 2031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [ptr, #32*(prefetch_distance+2)] 2041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 2051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF mov, SCRATCH, base, lsl #32-5 2061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF add, SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift 2071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF rsbs, SCRATCH, SCRATCH, WK0, lsl #32-5+bpp_shift-dst_bpp_shift 2081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bls, 61f 2091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 2101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 2111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck60: PF pld, [ptr, #32*(prefetch_distance+1)] 2121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck61: 2131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 2141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 2151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#define IS_END_OF_GROUP(INDEX,SIZE) ((SIZE) < 2 || ((INDEX) & ~((INDEX)+1)) & ((SIZE)/2)) 2171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro preload_middle bpp, base, scratch_holds_offset 2181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if bpp > 0 2191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* prefetch distance = 256/bpp, stm distance = 128/dst_w_bpp */ 2201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if IS_END_OF_GROUP(SUBBLOCK,256/128*dst_w_bpp/bpp) 2211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if scratch_holds_offset 2221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [base, SCRATCH] 2231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 2241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bic, SCRATCH, base, #31 2251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [SCRATCH, #32*prefetch_distance] 2261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 2271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 2281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 2291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 2301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro preload_trailing bpp, bpp_shift, base 2321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if bpp > 0 2331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if bpp*pix_per_block > 256 2341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Calculations are more complex if more than one fetch per block */ 2351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF and, WK1, base, #31 2361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF add, WK1, WK1, WK0, lsl #bpp_shift 2371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF add, WK1, WK1, #32*(bpp*pix_per_block/256-1)*(prefetch_distance+1) 2381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bic, SCRATCH, base, #31 2391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck80: PF pld, [SCRATCH, #32*(prefetch_distance+1)] 2401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF add, SCRATCH, SCRATCH, #32 2411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF subs, WK1, WK1, #32 2421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bhi, 80b 2431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 2441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* If exactly one fetch per block, then we need either 0, 1 or 2 extra preloads */ 2451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF mov, SCRATCH, base, lsl #32-5 2461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF adds, SCRATCH, SCRATCH, X, lsl #32-5+bpp_shift 2471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF adceqs, SCRATCH, SCRATCH, #0 2481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* The instruction above has two effects: ensures Z is only 2491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * set if C was clear (so Z indicates that both shifted quantities 2501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * were 0), and clears C if Z was set (so C indicates that the sum 2511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * of the shifted quantities was greater and not equal to 32) */ 2521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF beq, 82f 2531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bic, SCRATCH, base, #31 2541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bcc, 81f 2551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [SCRATCH, #32*(prefetch_distance+2)] 2561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck81: PF pld, [SCRATCH, #32*(prefetch_distance+1)] 2571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck82: 2581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 2591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 2601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 2611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro preload_line narrow_case, bpp, bpp_shift, base 2641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* "narrow_case" - just means that the macro was invoked from the "narrow" 2651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * code path rather than the "medium" one - because in the narrow case, 2661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * the row of pixels is known to output no more than 30 bytes, then 2671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * (assuming the source pixels are no wider than the the destination 2681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * pixels) they cannot possibly straddle more than 2 32-byte cachelines, 2691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * meaning there's no need for a loop. 2701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * "bpp" - number of bits per pixel in the channel (source, mask or 2711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * destination) that's being preloaded, or 0 if this channel is not used 2721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * for reading 2731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * "bpp_shift" - log2 of ("bpp"/8) (except if "bpp"=0 of course) 2741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * "base" - base address register of channel to preload (SRC, MASK or DST) 2751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 2761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if bpp > 0 2771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if narrow_case && (bpp <= dst_w_bpp) 2781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* In these cases, each line for each channel is in either 1 or 2 cache lines */ 2791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bic, WK0, base, #31 2801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [WK0] 2811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF add, WK1, base, X, LSL #bpp_shift 2821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF sub, WK1, WK1, #1 2831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bic, WK1, WK1, #31 2841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF cmp, WK1, WK0 2851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF beq, 90f 2861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [WK1] 2871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck90: 2881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 2891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bic, WK0, base, #31 2901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [WK0] 2911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF add, WK1, base, X, lsl #bpp_shift 2921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF sub, WK1, WK1, #1 2931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bic, WK1, WK1, #31 2941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF cmp, WK1, WK0 2951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF beq, 92f 2961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck91: PF add, WK0, WK0, #32 2971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF cmp, WK0, WK1 2981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [WK0] 2991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF bne, 91b 3001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck92: 3011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx 3071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, 0 3081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if decrementx 3091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub&cond X, X, #8*numbytes/dst_w_bpp 3101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_tail cond, numbytes, firstreg 3121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if !((flags) & FLAG_PROCESS_DOES_STORE) 3131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixst cond, numbytes, firstreg, DST 3141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro conditional_process1 cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx 3181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (flags) & FLAG_BRANCH_OVER 3191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc cond,mi 3201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bpl 100f 3211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc cond,cs 3231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bcc 100f 3241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc cond,ne 3261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq 100f 3271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process1_helper , process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx 3291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck100: 3301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 3311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process1_helper cond, process_head, process_tail, numbytes, firstreg, unaligned_src, unaligned_mask, decrementx 3321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro conditional_process2 test, cond1, cond2, process_head, process_tail, numbytes1, numbytes2, firstreg1, firstreg2, unaligned_src, unaligned_mask, decrementx 3361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (flags) & (FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE) 3371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Can't interleave reads and writes */ 3381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck test 3391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process1 cond1, process_head, process_tail, numbytes1, firstreg1, unaligned_src, unaligned_mask, decrementx 3401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (flags) & FLAG_PROCESS_CORRUPTS_PSR 3411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck test 3421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process1 cond2, process_head, process_tail, numbytes2, firstreg2, unaligned_src, unaligned_mask, decrementx 3441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 3451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Can interleave reads and writes for better scheduling */ 3461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck test 3471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_head cond1, numbytes1, firstreg1, unaligned_src, unaligned_mask, 0 3481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_head cond2, numbytes2, firstreg2, unaligned_src, unaligned_mask, 0 3491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if decrementx 3501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub&cond1 X, X, #8*numbytes1/dst_w_bpp 3511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub&cond2 X, X, #8*numbytes2/dst_w_bpp 3521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_tail cond1, numbytes1, firstreg1 3541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_tail cond2, numbytes2, firstreg2 3551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixst cond1, numbytes1, firstreg1, DST 3561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixst cond2, numbytes2, firstreg2, DST 3571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro test_bits_1_0_ptr 3621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck movs SCRATCH, WK0, lsl #32-1 /* C,N = bits 1,0 of DST */ 3631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro test_bits_3_2_ptr 3661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck movs SCRATCH, WK0, lsl #32-3 /* C,N = bits 3, 2 of DST */ 3671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro leading_15bytes process_head, process_tail 3701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* On entry, WK0 bits 0-3 = number of bytes until destination is 16-byte aligned */ 3711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Use unaligned loads in all cases for simplicity */ 3721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if dst_w_bpp == 8 3731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process2 test_bits_1_0_ptr, mi, cs, process_head, process_tail, 1, 2, 1, 2, 1, 1, 1 3741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif dst_w_bpp == 16 3751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck test_bits_1_0_ptr 3761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process1 cs, process_head, process_tail, 2, 2, 1, 1, 1 3771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process2 test_bits_3_2_ptr, mi, cs, process_head, process_tail, 4, 8, 1, 2, 1, 1, 1 3791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro test_bits_3_2_pix 3821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck movs SCRATCH, X, lsl #dst_bpp_shift+32-3 3831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro test_bits_1_0_pix 3861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if dst_w_bpp == 8 3871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck movs SCRATCH, X, lsl #dst_bpp_shift+32-1 3881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 3891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck movs SCRATCH, X, lsr #1 3901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 3911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 3921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask 3941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process2 test_bits_3_2_pix, cs, mi, process_head, process_tail, 8, 4, 0, 2, unaligned_src, unaligned_mask, 0 3951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if dst_w_bpp == 16 3961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck test_bits_1_0_pix 3971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process1 cs, process_head, process_tail, 2, 0, unaligned_src, unaligned_mask, 0 3981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif dst_w_bpp == 8 3991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process2 test_bits_1_0_pix, cs, mi, process_head, process_tail, 2, 1, 0, 1, unaligned_src, unaligned_mask, 0 4001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 4021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 4031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 4041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro wide_case_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, dst_alignment 4051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck110: 4061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set SUBBLOCK, 0 /* this is a count of STMs; there can be up to 8 STMs per block */ 4071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .rept pix_per_block*dst_w_bpp/128 4081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_head , 16, 0, unaligned_src, unaligned_mask, 1 4091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) 4101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_middle src_bpp, SRC, 1 4111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) 4121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_middle mask_bpp, MASK, 1 4131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 4141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_middle src_bpp, SRC, 0 4151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_middle mask_bpp, MASK, 0 4161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (dst_r_bpp > 0) && ((SUBBLOCK % 2) == 0) 4181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Because we know that writes are 16-byte aligned, it's relatively easy to ensure that 4191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * destination prefetches are 32-byte aligned. It's also the easiest channel to offset 4201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * preloads for, to achieve staggered prefetches for multiple channels, because there are 4211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * always two STMs per prefetch, so there is always an opposite STM on which to put the 4221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * preload. Note, no need to BIC the base register here */ 4231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF pld, [DST, #32*prefetch_distance - dst_alignment] 4241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_tail , 16, 0 4261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if !((flags) & FLAG_PROCESS_DOES_STORE) 4271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixst , 16, 0, DST 4281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set SUBBLOCK, SUBBLOCK+1 4301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endr 4311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs X, X, #pix_per_block 4321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bhs 110b 4331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 4341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 4351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro wide_case_inner_loop_and_trailing_pixels process_head, process_tail, process_inner_loop, exit_label, unaligned_src, unaligned_mask 4361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Destination now 16-byte aligned; we have at least one block before we have to stop preloading */ 4371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if dst_r_bpp > 0 4381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst DST, #16 4391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne 111f 4401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 16 4411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck b 112f 4421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck111: 4431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_inner_loop process_head, process_tail, unaligned_src, unaligned_mask, 0 4451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck112: 4461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */ 4471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (src_bpp*pix_per_block > 256) || (mask_bpp*pix_per_block > 256) || (dst_r_bpp*pix_per_block > 256) 4481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF and, WK0, X, #pix_per_block-1 4491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_trailing src_bpp, src_bpp_shift, SRC 4511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_trailing mask_bpp, mask_bpp_shift, MASK 4521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_trailing dst_r_bpp, dst_bpp_shift, DST 4531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck add X, X, #(prefetch_distance+2)*pix_per_block - 128/dst_w_bpp 4541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* The remainder of the line is handled identically to the medium case */ 4551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck medium_case_inner_loop_and_trailing_pixels process_head, process_tail,, exit_label, unaligned_src, unaligned_mask 4561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 4571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 4581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro medium_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask 4591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck120: 4601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_head , 16, 0, unaligned_src, unaligned_mask, 0 4611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_tail , 16, 0 4621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if !((flags) & FLAG_PROCESS_DOES_STORE) 4631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixst , 16, 0, DST 4641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs X, X, #128/dst_w_bpp 4661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bhs 120b 4671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Trailing pixels */ 4681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst X, #128/dst_w_bpp - 1 4691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq exit_label 4701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask 4711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 4721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 4731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro narrow_case_inner_loop_and_trailing_pixels process_head, process_tail, unused, exit_label, unaligned_src, unaligned_mask 4741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst X, #16*8/dst_w_bpp 4751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck conditional_process1 ne, process_head, process_tail, 16, 0, unaligned_src, unaligned_mask, 0 4761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Trailing pixels */ 4771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* In narrow case, it's relatively unlikely to be aligned, so let's do without a branch here */ 4781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck trailing_15bytes process_head, process_tail, unaligned_src, unaligned_mask 4791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 4801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 4811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro switch_on_alignment action, process_head, process_tail, process_inner_loop, exit_label 4821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Note that if we're reading the destination, it's already guaranteed to be aligned at this point */ 4831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if mask_bpp == 8 || mask_bpp == 16 4841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst MASK, #3 4851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne 141f 4861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp == 8 || src_bpp == 16 4881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst SRC, #3 4891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne 140f 4901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck action process_head, process_tail, process_inner_loop, exit_label, 0, 0 4921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp == 8 || src_bpp == 16 4931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck b exit_label 4941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck140: 4951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck action process_head, process_tail, process_inner_loop, exit_label, 1, 0 4961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 4971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if mask_bpp == 8 || mask_bpp == 16 4981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck b exit_label 4991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck141: 5001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp == 8 || src_bpp == 16 5011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst SRC, #3 5021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne 142f 5031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck action process_head, process_tail, process_inner_loop, exit_label, 0, 1 5051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp == 8 || src_bpp == 16 5061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck b exit_label 5071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck142: 5081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck action process_head, process_tail, process_inner_loop, exit_label, 1, 1 5091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 5121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 5131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 5141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro end_of_line restore_x, vars_spilled, loop_label, last_one 5151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if vars_spilled 5161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Sadly, GAS doesn't seem have an equivalent of the DCI directive? */ 5171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* This is ldmia sp,{} */ 5181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .word 0xE89D0000 | LINE_SAVED_REGS 5191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs Y, Y, #1 5211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if vars_spilled 5221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (LINE_SAVED_REGS) & (1<<1) 5231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck str Y, [sp] 5241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck add DST, DST, STRIDE_D 5271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp > 0 5281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck add SRC, SRC, STRIDE_S 5291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if mask_bpp > 0 5311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck add MASK, MASK, STRIDE_M 5321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if restore_x 5341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck mov X, ORIG_W 5351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bhs loop_label 5371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc "last_one","" 5381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if vars_spilled 5391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck b 197f 5401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 5411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck b 198f 5421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 5441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (!vars_spilled) && ((flags) & FLAG_SPILL_LINE_VARS) 5451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck b 198f 5461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 5491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 5501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 5511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro generate_composite_function fname, \ 5521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck src_bpp_, \ 5531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck mask_bpp_, \ 5541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck dst_w_bpp_, \ 5551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck flags_, \ 5561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck prefetch_distance_, \ 5571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck init, \ 5581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck newline, \ 5591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck cleanup, \ 5601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_head, \ 5611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_tail, \ 5621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_inner_loop 5631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 5641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .func fname 5651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .global fname 5661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* For ELF format also set function visibility to hidden */ 5671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef __ELF__ 5681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .hidden fname 5691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .type fname, %function 5701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif 5711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 5721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 5731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Make some macro arguments globally visible and accessible 5741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * from other macros 5751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 5761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set src_bpp, src_bpp_ 5771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set mask_bpp, mask_bpp_ 5781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set dst_w_bpp, dst_w_bpp_ 5791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set flags, flags_ 5801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set prefetch_distance, prefetch_distance_ 5811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 5821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 5831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Select prefetch type for this function. 5841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 5851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if prefetch_distance == 0 5861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE 5871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 5881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_STANDARD 5891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 5901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 5911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp == 32 5921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set src_bpp_shift, 2 5931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif src_bpp == 24 5941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set src_bpp_shift, 0 5951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif src_bpp == 16 5961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set src_bpp_shift, 1 5971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif src_bpp == 8 5981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set src_bpp_shift, 0 5991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif src_bpp == 0 6001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set src_bpp_shift, -1 6011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 6021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .error "requested src bpp (src_bpp) is not supported" 6031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if mask_bpp == 32 6061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set mask_bpp_shift, 2 6071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif mask_bpp == 24 6081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set mask_bpp_shift, 0 6091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif mask_bpp == 8 6101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set mask_bpp_shift, 0 6111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif mask_bpp == 0 6121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set mask_bpp_shift, -1 6131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 6141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .error "requested mask bpp (mask_bpp) is not supported" 6151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if dst_w_bpp == 32 6181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set dst_bpp_shift, 2 6191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif dst_w_bpp == 24 6201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set dst_bpp_shift, 0 6211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif dst_w_bpp == 16 6221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set dst_bpp_shift, 1 6231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif dst_w_bpp == 8 6241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set dst_bpp_shift, 0 6251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 6261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .error "requested dst bpp (dst_w_bpp) is not supported" 6271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (((flags) & FLAG_DST_READWRITE) != 0) 6301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set dst_r_bpp, dst_w_bpp 6311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 6321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set dst_r_bpp, 0 6331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set pix_per_block, 16*8/dst_w_bpp 6361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp != 0 6371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if 32*8/src_bpp > pix_per_block 6381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set pix_per_block, 32*8/src_bpp 6391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if mask_bpp != 0 6421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if 32*8/mask_bpp > pix_per_block 6431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set pix_per_block, 32*8/mask_bpp 6441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if dst_r_bpp != 0 6471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if 32*8/dst_r_bpp > pix_per_block 6481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set pix_per_block, 32*8/dst_r_bpp 6491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* The standard entry conditions set up by pixman-arm-common.h are: 6531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * r0 = width (pixels) 6541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * r1 = height (rows) 6551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * r2 = pointer to top-left pixel of destination 6561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * r3 = destination stride (pixels) 6571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * [sp] = source pixel value, or pointer to top-left pixel of source 6581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * [sp,#4] = 0 or source stride (pixels) 6591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * The following arguments are unused for non-mask operations 6601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * [sp,#8] = mask pixel value, or pointer to top-left pixel of mask 6611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * [sp,#12] = 0 or mask stride (pixels) 6621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 6631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 6651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Assign symbolic names to registers 6661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 6671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck X .req r0 /* pixels to go on this line */ 6681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck Y .req r1 /* lines to go */ 6691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck DST .req r2 /* destination pixel pointer */ 6701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck STRIDE_D .req r3 /* destination stride (bytes, minus width) */ 6711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck SRC .req r4 /* source pixel pointer */ 6721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck STRIDE_S .req r5 /* source stride (bytes, minus width) */ 6731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck MASK .req r6 /* mask pixel pointer (if applicable) */ 6741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck STRIDE_M .req r7 /* mask stride (bytes, minus width) */ 6751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck WK0 .req r8 /* pixel data registers */ 6761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck WK1 .req r9 6771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck WK2 .req r10 6781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck WK3 .req r11 6791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck SCRATCH .req r12 6801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ORIG_W .req r14 /* width (pixels) */ 6811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6821176bdada62cabc6ec4b0308a930e83b679d5d36John Reckfname: 6831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck push {r4-r11, lr} /* save all registers */ 6841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs Y, Y, #1 6861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck blo 199f 6871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef DEBUG_PARAMS 6891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub sp, sp, #9*4 6901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif 6911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 6921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp > 0 6931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldr SRC, [sp, #ARGS_STACK_OFFSET] 6941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldr STRIDE_S, [sp, #ARGS_STACK_OFFSET+4] 6951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 6961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if mask_bpp > 0 6971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldr MASK, [sp, #ARGS_STACK_OFFSET+8] 6981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck ldr STRIDE_M, [sp, #ARGS_STACK_OFFSET+12] 6991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef DEBUG_PARAMS 7021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck add Y, Y, #1 7031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck stmia sp, {r0-r7,pc} 7041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub Y, Y, #1 7051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif 7061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck init 7081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lsl STRIDE_D, #dst_bpp_shift /* stride in bytes */ 7101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub STRIDE_D, STRIDE_D, X, lsl #dst_bpp_shift 7111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp > 0 7121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lsl STRIDE_S, #src_bpp_shift 7131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub STRIDE_S, STRIDE_S, X, lsl #src_bpp_shift 7141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if mask_bpp > 0 7161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lsl STRIDE_M, #mask_bpp_shift 7171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub STRIDE_M, STRIDE_M, X, lsl #mask_bpp_shift 7181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Are we not even wide enough to have one 16-byte aligned 16-byte block write? */ 7211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck cmp X, #2*16*8/dst_w_bpp - 1 7221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck blo 170f 7231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if src_bpp || mask_bpp || dst_r_bpp /* Wide and medium cases are the same for fill */ 7241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* To preload ahead on the current line, we need at least (prefetch_distance+2) 32-byte blocks on all prefetch channels */ 7251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck cmp X, #(prefetch_distance+3)*pix_per_block - 1 7261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck blo 160f 7271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Wide case */ 7291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Adjust X so that the decrement instruction can also test for 7301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * inner loop termination. We want it to stop when there are 7311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * (prefetch_distance+1) complete blocks to go. */ 7321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub X, X, #(prefetch_distance+2)*pix_per_block 7331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck mov ORIG_W, X 7341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (flags) & FLAG_SPILL_LINE_VARS_WIDE 7351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* This is stmdb sp!,{} */ 7361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .word 0xE92D0000 | LINE_SAVED_REGS 7371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck151: /* New line */ 7391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck newline 7401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_leading_step1 src_bpp, WK1, SRC 7411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_leading_step1 mask_bpp, WK2, MASK 7421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_leading_step1 dst_r_bpp, WK3, DST 7431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst DST, #15 7451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq 154f 7461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */ 7471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (src_bpp != 0 && src_bpp != 2*dst_w_bpp) || (mask_bpp != 0 && mask_bpp != 2*dst_w_bpp) 7481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck PF and, WK0, WK0, #15 7491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_leading_step2 src_bpp, src_bpp_shift, WK1, SRC 7521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_leading_step2 mask_bpp, mask_bpp_shift, WK2, MASK 7531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_leading_step2 dst_r_bpp, dst_bpp_shift, WK3, DST 7541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck leading_15bytes process_head, process_tail 7561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck154: /* Destination now 16-byte aligned; we have at least one prefetch on each channel as well as at least one 16-byte output block */ 7581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (src_bpp > 0) && (mask_bpp == 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) 7591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck and SCRATCH, SRC, #31 7601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck rsb SCRATCH, SCRATCH, #32*prefetch_distance 7611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif (src_bpp == 0) && (mask_bpp > 0) && ((flags) & FLAG_PROCESS_PRESERVES_SCRATCH) 7621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck and SCRATCH, MASK, #31 7631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck rsb SCRATCH, SCRATCH, #32*prefetch_distance 7641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc "process_inner_loop","" 7661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, wide_case_inner_loop, 157f 7671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .else 7681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck switch_on_alignment wide_case_inner_loop_and_trailing_pixels, process_head, process_tail, process_inner_loop, 157f 7691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck157: /* Check for another line */ 7721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_WIDE), 151b 7731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ltorg 7761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck160: /* Medium case */ 7781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck mov ORIG_W, X 7791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE 7801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* This is stmdb sp!,{} */ 7811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .word 0xE92D0000 | LINE_SAVED_REGS 7821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 7831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck161: /* New line */ 7841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck newline 7851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_line 0, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ 7861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_line 0, mask_bpp, mask_bpp_shift, MASK 7871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_line 0, dst_r_bpp, dst_bpp_shift, DST 7881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sub X, X, #128/dst_w_bpp /* simplifies inner loop termination */ 7901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst DST, #15 7911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq 164f 7921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck rsb WK0, DST, #0 /* bits 0-3 = number of leading bytes until destination aligned */ 7931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck leading_15bytes process_head, process_tail 7951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */ 7971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck switch_on_alignment medium_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 167f 7981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 7991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck167: /* Check for another line */ 8001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck end_of_line 1, %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 161b 8011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ltorg 8031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck170: /* Narrow case, less than 31 bytes, so no guarantee of at least one 16-byte block */ 8051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if dst_w_bpp < 32 8061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck mov ORIG_W, X 8071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (flags) & FLAG_SPILL_LINE_VARS_NON_WIDE 8091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* This is stmdb sp!,{} */ 8101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .word 0xE92D0000 | LINE_SAVED_REGS 8111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck171: /* New line */ 8131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck newline 8141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_line 1, src_bpp, src_bpp_shift, SRC /* in: X, corrupts: WK0-WK1 */ 8151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_line 1, mask_bpp, mask_bpp_shift, MASK 8161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck preload_line 1, dst_r_bpp, dst_bpp_shift, DST 8171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if dst_w_bpp == 8 8191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst DST, #3 8201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq 174f 8211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck172: subs X, X, #1 8221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck blo 177f 8231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_head , 1, 0, 1, 1, 0 8241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_tail , 1, 0 8251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if !((flags) & FLAG_PROCESS_DOES_STORE) 8261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixst , 1, 0, DST 8271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst DST, #3 8291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne 172b 8301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .elseif dst_w_bpp == 16 8311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck tst DST, #2 8321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq 174f 8331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subs X, X, #1 8341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck blo 177f 8351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_head , 2, 0, 1, 1, 0 8361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck process_tail , 2, 0 8371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if !((flags) & FLAG_PROCESS_DOES_STORE) 8381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pixst , 2, 0, DST 8391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */ 8431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck switch_on_alignment narrow_case_inner_loop_and_trailing_pixels, process_head, process_tail,, 177f 8441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck177: /* Check for another line */ 8461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck end_of_line %(dst_w_bpp < 32), %((flags) & FLAG_SPILL_LINE_VARS_NON_WIDE), 171b, last_one 8471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck197: 8491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .if (flags) & FLAG_SPILL_LINE_VARS 8501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck add sp, sp, #LINE_SAVED_REG_COUNT*4 8511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck198: 8531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck cleanup 8541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef DEBUG_PARAMS 8561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck add sp, sp, #9*4 /* junk the debug copy of arguments */ 8571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif 8581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck199: 8591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pop {r4-r11, pc} /* exit */ 8601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ltorg 8621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq X 8641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq Y 8651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq DST 8661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq STRIDE_D 8671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq SRC 8681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq STRIDE_S 8691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq MASK 8701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq STRIDE_M 8711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq WK0 8721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq WK1 8731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq WK2 8741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq WK3 8751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq SCRATCH 8761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .unreq ORIG_W 8771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endfunc 8781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 8791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 8801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro line_saved_regs x:vararg 8811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REGS, 0 8821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REG_COUNT, 0 8831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .irp SAVED_REG,x 8841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc "SAVED_REG","Y" 8851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<1) 8861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 8871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc "SAVED_REG","STRIDE_D" 8891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<3) 8901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 8911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc "SAVED_REG","STRIDE_S" 8931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<5) 8941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 8951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 8961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc "SAVED_REG","STRIDE_M" 8971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<7) 8981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 8991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 9001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .ifc "SAVED_REG","ORIG_W" 9011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REGS, LINE_SAVED_REGS | (1<<14) 9021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .set LINE_SAVED_REG_COUNT, LINE_SAVED_REG_COUNT + 1 9031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endif 9041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck .endr 9051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 9061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 9071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro nop_macro x:vararg 9081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm 909