1ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* 2ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Copyright (C) 2014 The Android Open Source Project 3ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * 4ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License"); 5ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * you may not use this file except in compliance with the License. 6ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * You may obtain a copy of the License at 7ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * 8ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * http://www.apache.org/licenses/LICENSE-2.0 9ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * 10ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Unless required by applicable law or agreed to in writing, software 11ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS, 12ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * See the License for the specific language governing permissions and 14ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * limitations under the License. 15ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 16ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 17ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: 18ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie#define END(f) .size f, .-f; 19ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 20ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* Perform the actual YuvToRGB conversion in a macro, from register to 21ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * register. This macro will be called from within several different wrapper 22ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * variants for different data layouts. Y data starts with the even and odd 23ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * bytes split into the low parts of v8 and v9 respectively. U and V are in 24ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * v16 and v17. Working constants are pre-loaded into v13-v15, and v3 is 25ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * pre-loaded with a constant 0xff alpha channel. 26ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * 27ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * The complicated arithmetic is the result of refactoring the original 28ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * equations to avoid 16-bit overflow without losing any precision. 29ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 30ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.macro yuvkern 31ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v7.8b, #149 32ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 33ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie umull v1.8h, v8.8b, v7.8b // g0 = y0 * 149 34ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie umull v5.8h, v9.8b, v7.8b // g1 = y1 * 149 35ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 36ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v7.8b, #50 37ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v10.8b, #104 38ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie umull v8.8h, v16.8b, v7.8b // g2 = u * 50 + v * 104 39ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie umlal v8.8h, v17.8b, v10.8b 40ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 41ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ushr v7.8b, v17.8b, #1 42ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uaddw v0.8h, v1.8h, v7.8b // r0 = y0 * 149 + (v >> 1) 43ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uaddw v4.8h, v5.8h, v7.8b // r1 = y1 * 149 + (v >> 1) 44ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 45ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ushll v7.8h, v16.8b, #2 46ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add v2.8h, v1.8h, v7.8h // b0 = y0 * 149 + (u << 2) 47ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add v6.8h, v5.8h, v7.8h // b1 = y1 * 149 + (u << 2) 48ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 49ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v7.16b, #204 50ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v10.8b, #254 51ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie umull v11.8h, v17.8b, v7.8b // r2 = v * 204 52ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie umull v12.8h, v16.8b, v10.8b // b2 = u * 254 53ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 54ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uhadd v0.8h, v0.8h, v11.8h // r0 = (r0 + r2) >> 1 55ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uhadd v4.8h, v4.8h, v11.8h // r1 = (r1 + r2) >> 1 56ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqadd v1.8h, v1.8h, v14.8h // g0 = satu16(g0 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) 57ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqadd v5.8h, v5.8h, v14.8h // g1 = satu16(g1 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) 58ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uhadd v2.8h, v2.8h, v12.8h // b0 = (b0 + b2) >> 1 59ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uhadd v6.8h, v6.8h, v12.8h // b1 = (b1 + b2) >> 1 60ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 61ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqsub v0.8h, v0.8h, v13.8h // r0 = satu16(r0 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) 62ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqsub v4.8h, v4.8h, v13.8h // r1 = satu16(r1 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) 63ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqsub v1.8h, v1.8h, v8.8h // g0 = satu16(g0 - g2) 64ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqsub v5.8h, v5.8h, v8.8h // g1 = satu16(g1 - g2) 65ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqsub v2.8h, v2.8h, v15.8h // b0 = satu16(b0 - (16 * 149 + (128 << 2) + 128 * 254) >> 1) 66ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqsub v6.8h, v6.8h, v15.8h // b1 = satu16(b1 - (16 * 149 + (128 << 2) + 128 * 254) >> 1) 67ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 68ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqrshrn v0.8b, v0.8h, #6 69ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqrshrn v4.8b, v4.8h, #6 70ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqrshrn v1.8b, v1.8h, #7 71ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqrshrn v5.8b, v5.8h, #7 72ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqrshrn v2.8b, v2.8h, #6 73ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uqrshrn v6.8b, v6.8h, #6 74ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 75ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip1 v0.16b, v0.16b, v4.16b 76ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip1 v1.16b, v1.16b, v5.16b 77ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip1 v2.16b, v2.16b, v6.16b 78ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.endm 79ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 80ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* Define the wrapper code which will load and store the data, iterate the 81ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * correct number of times, and safely handle the remainder at the end of the 82ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * loop. Some sections of code are switched out depending on the data packing 83ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * being handled. 84ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 85ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.macro wrap_line kernel, interleaved=0, swapuv=0 86ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 87ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie mov w5, #((16 * 149 + (128 >> 1) + 128 * 204) >> 1) 88ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie dup v13.8h, w5 89ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie mov w5, #((-16 * 149 + 128 * 50 + 128 * 104) >> 0) 90ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie dup v14.8h, w5 91ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie mov w5, #((16 * 149 + (128 << 2) + 128 * 254) >> 1) 92ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie dup v15.8h, w5 93ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 94ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v3.16b, #0xff 95ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 96ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie subs x2, x2, #16 97ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bhs 1f 98ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie b 2f 99ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 100ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .align 4 101ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: ld2 {v8.8b,v9.8b}, [x1], #16 102ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie// prfm PLDL1STRM, [x1, #256] 103ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 104ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \swapuv 105ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld2 {v17.8b,v18.8b}, [x3], #16 106ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie mov v16.8b, v18.8b 107ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 108ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld2 {v16.8b,v17.8b}, [x3], #16 109ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 110ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie// prfm PLD1STRM, [x3, #256] 111ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 112ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v16.8b}, [x3], #8 113ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v17.8b}, [x4], #8 114ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie// prfm PLD1STRM, [x3, #128] 115ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie// prfm PLD1STRM, [x4, #128] 116ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 117ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 118ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie \kernel 119ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 120ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie subs x2, x2, #16 121ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 122ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st4 {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64 123ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 124ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bhs 1b 125ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 126ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie2: adds x2, x2, #16 127ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 2f 128ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 129ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie /* To handle the tail portion of the data (something less than 16 130ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * bytes) load small power-of-two chunks into working registers. It 131ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * doesn't matter where they end up in the register; the same process 132ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * will store them back out using the same positions and the 133ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * interaction between neighbouring pixels is constrained to odd 134ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * boundaries where the load operations don't interfere. 135ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 136ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v8.8b, #0 137ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v9.8b, #0 138ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v16.8b, #0 139ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movi v17.8b, #0 140ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 141ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie tbz x2, #3, 1f 142ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v9.8b}, [x1], #8 143ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 144ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v17.8b}, [x3], #8 145ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 146ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v16.s}[1], [x3], #4 147ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v17.s}[1], [x4], #4 148ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 149ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tbz x2, #2, 1f 150ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v8.s}[1], [x1], #4 151ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 152ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v16.s}[1], [x3], #4 153ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 154ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v16.h}[1], [x3], #2 155ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v17.h}[1], [x4], #2 156ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 157ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tbz x2, #1, 1f 158ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v8.h}[1], [x1], #2 159ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 160ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v16.h}[1], [x3], #2 161ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 162ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v16.b}[1], [x3], #1 163ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v17.b}[1], [x4], #1 164ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 165ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tbz x2, #0, 1f 166ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v8.b}[1], [x1], #1 167ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 1681d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie ld1 {v16.h}[0], [x3], #2 169ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 170ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v16.b}[0], [x3], #1 171ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v17.b}[0], [x4], #1 172ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 173ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 174ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie /* One small impediment in the process above is that some of the load 175ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * operations can't perform byte-wise structure deinterleaving at the 176ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * same time as loading only part of a register. So the data is loaded 177ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * linearly and unpacked manually at this point if necessary. 178ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 179ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: uzp1 v8.16b, v8.16b, v9.16b 180ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 181ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \swapuv 182ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uzp1 v16.16b, v17.16b, v16.16b 183ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 184ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie uzp1 v16.16b, v16.16b, v17.16b 185ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 186ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 187ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 188ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie \kernel 189ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 190ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie /* As above but with the output; structured stores for partial vectors 191ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * aren't available, so the data is re-packed first and stored linearly. 192ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 193ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip1 v4.16b, v0.16b, v2.16b 194ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip2 v6.16b, v0.16b, v2.16b 195ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip1 v5.16b, v1.16b, v3.16b 196ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip2 v7.16b, v1.16b, v3.16b 197ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip1 v0.16b, v4.16b, v5.16b 198ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip2 v1.16b, v4.16b, v5.16b 199ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip1 v2.16b, v6.16b, v7.16b 200ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie zip2 v3.16b, v6.16b, v7.16b 201ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 202ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tbz x2, #3, 1f 203ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v2.16b,v3.16b}, [x0], #32 204ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tbz x2, #2, 1f 205ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v1.16b}, [x0], #16 206ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tbz x2, #1, 1f 207ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v0.d}[1], [x0], #8 208ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tbz x2, #0, 2f 209ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v0.s}[1], [x0], #4 210ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie2: 211ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.endm 212ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 213ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 214ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* void rsdIntrinsicYuv2_K( 215ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void *out, // x0 216ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *yin, // x1 217ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *uin, // x2 218ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *vin, // x3 219ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xstart, // x4 220ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xend); // x5 221ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 222ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuv2_K) 223ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie lsr x6, x4, #1 224ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x0, x0, x4, LSL #2 225ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x1, x1, x4 226ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x4, x3, x6 227ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x3, x2, x6 228ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub x2, x5, x6, LSL #2 229ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 230ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub x6, sp, #32 231ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub sp, sp, #64 232ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v8.1d - v11.1d}, [sp] 233ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v12.1d - v15.1d}, [x6] 234ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 235ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie wrap_line yuvkern, 0 236ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 237ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v8.1d - v11.1d}, [sp], #32 238ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v12.1d - v15.1d}, [sp], #32 239ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ret 240ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuv2_K) 241ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 242ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* void rsdIntrinsicYuv_K( 243ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void *out, // x0 244ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *yin, // x1 245ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *uvin, // x2 246ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xstart, // x3 247ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xend); // x4 248ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 249ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuv_K) 2501d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie bic x5, x3, #1 251ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x0, x0, x5, LSL #2 252ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x1, x1, x5 253ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x3, x2, x5 254ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub x2, x4, x5 255ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 256ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub x5, sp, #32 257ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub sp, sp, #64 258ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v8.1d - v11.1d}, [sp] 259ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v12.1d - v15.1d}, [x5] 260ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 261ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie wrap_line yuvkern, 1, 1 262ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 263ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v8.1d - v11.1d}, [sp], #32 264ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v12.1d - v15.1d}, [sp], #32 265ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ret 266ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuv_K) 267ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 268ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* void rsdIntrinsicYuvR_K( 269ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void *out, // x0 270ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *yin, // x1 271ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *uvin, // x2 272ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xstart, // x3 273ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xend); // x4 274ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 275ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuvR_K) 2761d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie bic x5, x3, #1 277ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x0, x0, x5, LSL #2 278ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x1, x1, x5 279ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add x3, x2, x5 280ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub x2, x4, x5 281ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 282ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub x5, sp, #32 283ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub sp, sp, #64 284ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v8.1d - v11.1d}, [sp] 285ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie st1 {v12.1d - v15.1d}, [x5] 286ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 287ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie wrap_line yuvkern, 1 288ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 289ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v8.1d - v11.1d}, [sp], #32 290ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ld1 {v12.1d - v15.1d}, [sp], #32 291ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ret 292ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuvR_K) 293