1ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* 2ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Copyright (C) 2014 The Android Open Source Project 3ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * 4ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License"); 5ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * you may not use this file except in compliance with the License. 6ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * You may obtain a copy of the License at 7ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * 8ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * http://www.apache.org/licenses/LICENSE-2.0 9ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * 10ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Unless required by applicable law or agreed to in writing, software 11ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS, 12ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * See the License for the specific language governing permissions and 14ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * limitations under the License. 15ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 16ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 17ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart 18ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie#define END(f) .fnend; .size f, .-f; 19ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 20ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.eabi_attribute 25,1 @Tag_ABI_align8_preserved 21ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.arm 22ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 23ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* Perform the actual YuvToRGB conversion in a macro, from register to 24ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * register. This macro will be called from within several different wrapper 25ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * variants for different data layouts. Y data starts in q8, but with the even 26ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * and odd bytes split into d16 and d17 respectively. U and V are in d20 27ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * and d21. Working constants are pre-loaded into q13-q15, and q3 is 28ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * pre-loaded with a constant 0xff alpha channel. 29ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * 30ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * The complicated arithmetic is the result of refactoring the original 31ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * equations to avoid 16-bit overflow without losing any precision. 32ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 33ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.macro yuvkern 34ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmov.i8 d15, #149 35ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 36ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmull.u8 q1, d16, d15 // g0 = y0 * 149 37ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmull.u8 q5, d17, d15 // g1 = y1 * 149 38ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 39ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmov.i8 d14, #50 40ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmov.i8 d15, #104 41ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmull.u8 q8, d20, d14 // g2 = u * 50 + v * 104 42ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmlal.u8 q8, d21, d15 43ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 44ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vshr.u8 d14, d21, #1 45ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vaddw.u8 q0, q1, d14 // r0 = y0 * 149 + (v >> 1) 46ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vaddw.u8 q4, q5, d14 // r1 = y1 * 149 + (v >> 1) 47ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 48ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vshll.u8 q7, d20, #2 49ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vadd.u16 q2, q1, q7 // b0 = y0 * 149 + (u << 2) 50ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vadd.u16 q6, q5, q7 // b1 = y1 * 149 + (u << 2) 51ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 52ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmov.i8 d14, #204 53ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmov.i8 d15, #254 54ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmull.u8 q11, d21, d14 // r2 = v * 204 55ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmull.u8 q12, d20, d15 // b2 = u * 254 56ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 57ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vhadd.u16 q0, q11 // r0 = (r0 + r2) >> 1 58ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vhadd.u16 q4, q11 // r1 = (r1 + r2) >> 1 59ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqadd.u16 q1, q14 // g0 = satu16(g0 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) 60ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqadd.u16 q5, q14 // g1 = satu16(g1 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0) 61ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vhadd.u16 q2, q12 // b0 = (b0 + b2) >> 1 62ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vhadd.u16 q6, q12 // b1 = (b1 + b2) >> 1 63ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 64ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqsub.u16 q0, q13 // r0 = satu16(r0 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) 65ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqsub.u16 q4, q13 // r1 = satu16(r1 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1) 66ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqsub.u16 q1, q8 // g0 = satu16(g0 - g2) 67ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqsub.u16 q5, q8 // g1 = satu16(g1 - g2) 68ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqsub.u16 q2, q15 // b0 = satu16(b0 - (16 * 149 + (128 << 2) + 128 * 254) >> 1) 69ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqsub.u16 q6, q15 // b1 = satu16(b1 - (16 * 149 + (128 << 2) + 128 * 254) >> 1) 70ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 71ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqrshrn.u16 d0, q0, #6 72ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqrshrn.u16 d1, q1, #7 73ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqrshrn.u16 d2, q4, #6 74ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqrshrn.u16 d3, q5, #7 75ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqrshrn.u16 d4, q2, #6 76ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vqrshrn.u16 d5, q6, #6 77ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 78ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vzip.u8 q0, q1 79ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vzip.u8 d4, d5 80ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.endm 81ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 82ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* Define the wrapper code which will load and store the data, iterate the 83ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * correct number of times, and safely handle the remainder at the end of the 84ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * loop. Some sections of code are switched out depending on the data packing 85ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * being handled. 86ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 87ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.macro wrap_line kernel, interleaved=0, swapuv=0 88ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 89ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movw r5, #((16 * 149 + (128 >> 1) + 128 * 204) >> 1) 90ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vdup.i16 q13, r5 91ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movw r5, #((-16 * 149 + 128 * 50 + 128 * 104) >> 0) 92ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vdup.i16 q14, r5 93ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie movw r5, #((16 * 149 + (128 << 2) + 128 * 254) >> 1) 94ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vdup.i16 q15, r5 95ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 96ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmov.i8 q3, #0xff 97ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 98ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie subs r2, #16 99ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bhs 1f 100ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie b 2f 101ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 102ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .align 4 103ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: vld2.u8 {d16,d17}, [r1]! 104ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie pld [r1, #256] 105ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 106ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld2.u8 {d20,d21}, [r3]! 107ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \swapuv 108ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vswp d20, d21 109ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 110ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie pld [r3, #256] 111ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 112ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d20, [r3]! 113ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d21, [r4]! 114ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie pld [r3, #128] 115ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie pld [r4, #128] 116ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 117ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 118ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie \kernel 119ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 1201d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie subs r2, #16 121ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 122ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vst4.u8 {d0,d2,d4,d6}, [r0]! 123ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vst4.u8 {d1,d3,d5,d7}, [r0]! 124ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 125ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bhs 1b 126ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 127ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie2: adds r2, #16 128ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 2f 129ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 130ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie /* To handle the tail portion of the data (something less than 16 131ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * bytes) load small power-of-two chunks into working registers. It 132ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * doesn't matter where they end up in the register; the same process 133ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * will store them back out using the same positions and the 134ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * interaction between neighbouring pixels is constrained to odd 135ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * boundaries where the load operations don't interfere. 136ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 137ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmov.i8 q8, #0 138ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vmov.i8 q10, #0 139ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 140ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie tst r2, #8 141ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 1f 142ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d17, [r1]! 143ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 144ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d21, [r3]! 145ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 146ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u32 d20[1], [r3]! 147ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u32 d21[1], [r4]! 148ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 149ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 150ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tst r2, #4 151ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 1f 152ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u32 d16[1], [r1]! 153ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 154ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u32 d20[1], [r3]! 155ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 156ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u16 d20[1], [r3]! 157ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u16 d21[1], [r4]! 158ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 159ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tst r2, #2 160ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 1f 161ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u16 d16[1], [r1]! 162ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 163ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u16 d20[1], [r3]! 164ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 165ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d20[1], [r3]! 166ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d21[1], [r4]! 167ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 168ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tst r2, #1 169ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 1f 170ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d16[1], [r1]! 171ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 1721d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie vld1.u16 d20[0], [r3]! 173ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .else 174ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d20[0], [r3]! 175ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vld1.u8 d21[0], [r4]! 176ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 177ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 178ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie /* One small impediment in the process above is that some of the load 179ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * operations can't perform byte-wise structure deinterleaving at the 180ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * same time as loading only part of a register. So the data is loaded 181ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * linearly and unpacked manually at this point if necessary. 182ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 183ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: vuzp.8 d16, d17 184ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \interleaved 185ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vuzp.8 d20, d21 186ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .if \swapuv 187ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vswp d20, d21 188ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 189ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie .endif 190ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 191ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie \kernel 192ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 193ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie /* As above but with the output; structured stores for partial vectors 194ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * aren't available, so the data is re-packed first and stored linearly. 195ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 196ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vzip.8 q0, q2 197ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vzip.8 q1, q3 198ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vzip.8 q0, q1 199ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vzip.8 q2, q3 200ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 201ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tst r2, #8 202ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 1f 203ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vst1.u8 {d4,d5,d6,d7}, [r0]! 204ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 205ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tst r2, #4 206ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 1f 207ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vst1.u8 {d2,d3}, [r0]! 208ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tst r2, #2 209ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 1f 210ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vst1.u8 d1, [r0]! 211ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1: tst r2, #1 212ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie beq 2f 213ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vst1.u32 d0[1], [r0]! 214ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie2: 215ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.endm 216ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 217ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 218ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* void rsdIntrinsicYuv2_K( 219ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void *out, // r0 220ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *yin, // r1 221ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *uin, // r2 222ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *vin, // r3 223ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xstart, // [sp] 224ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xend); // [sp+#4] 225ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 226ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuv2_K) 227ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie push {r4,r5} 228ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ldr r5, [sp, #8] 229ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie mov r4, r3 230ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie mov r3, r2 231ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ldr r2, [sp, #12] 232ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 233ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r0, r5, LSL #2 234ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r1, r5 235ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r3, r5, LSR #1 236ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r4, r5, LSR #1 237ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub r2, r5 238ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 239ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vpush {d8-d15} 240ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 241ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie wrap_line yuvkern, 0 242ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 243ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vpop {d8-d15} 244ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie pop {r4,r5} 245ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bx lr 246ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuv2_K) 247ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 248ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* void rsdIntrinsicYuv_K( 249ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void *out, // r0 250ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *yin, // r1 251ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *uvin, // r2 252ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xstart, // r3 253ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xend); // [sp] 254ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 255ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuv_K) 256ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie push {r4,r5} 257ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bic r4, r3, #1 258ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r3, r2, r4 259ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ldr r2, [sp, #8] 260ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 261ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r0, r4, LSL #2 262ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r1, r4 263ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub r2, r4 264ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 265ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vpush {d8-d15} 266ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 267ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie wrap_line yuvkern, 1, 1 268ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 269ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vpop {d8-d15} 270ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie pop {r4,r5} 271ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bx lr 272ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuv_K) 273ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 274ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* void rsdIntrinsicYuvR_K( 275ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void *out, // r0 276ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *yin, // r1 277ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * void const *uvin, // r2 278ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xstart, // r3 279ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * size_t xend); // [sp] 280ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */ 281ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuvR_K) 282ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie push {r4,r5} 283ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bic r4, r3, #1 284ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r3, r2, r4 285ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie ldr r2, [sp, #8] 286ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 287ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r0, r4, LSL #2 288ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie add r1, r4 289ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie sub r2, r4 290ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 291ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vpush {d8-d15} 292ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 293ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie wrap_line yuvkern, 1 294ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie 295ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie vpop {d8-d15} 296ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie pop {r4,r5} 297ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie bx lr 298ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuvR_K) 299