1ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/*
2ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Copyright (C) 2014 The Android Open Source Project
3ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *
4ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License");
5ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * you may not use this file except in compliance with the License.
6ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * You may obtain a copy of the License at
7ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *
8ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *      http://www.apache.org/licenses/LICENSE-2.0
9ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *
10ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Unless required by applicable law or agreed to in writing, software
11ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS,
12ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * See the License for the specific language governing permissions and
14ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * limitations under the License.
15ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
16ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
17ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart
18ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie#define END(f) .fnend; .size f, .-f;
19ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
20ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.eabi_attribute 25,1 @Tag_ABI_align8_preserved
21ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.arm
22ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
23ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* Perform the actual YuvToRGB conversion in a macro, from register to
24ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * register.  This macro will be called from within several different wrapper
25ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * variants for different data layouts.  Y data starts in q8, but with the even
26ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * and odd bytes split into d16 and d17 respectively.  U and V are in d20
27ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * and d21.  Working constants are pre-loaded into q13-q15, and q3 is
28ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * pre-loaded with a constant 0xff alpha channel.
29ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *
30ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * The complicated arithmetic is the result of refactoring the original
31ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * equations to avoid 16-bit overflow without losing any precision.
32ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
33ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.macro yuvkern
34ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmov.i8     d15, #149
35ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
36ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmull.u8    q1, d16, d15        // g0 = y0 * 149
37ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmull.u8    q5, d17, d15        // g1 = y1 * 149
38ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
39ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmov.i8     d14, #50
40ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmov.i8     d15, #104
41ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmull.u8    q8, d20, d14        // g2 = u * 50 + v * 104
42ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmlal.u8    q8, d21, d15
43ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
44ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vshr.u8     d14, d21, #1
45ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vaddw.u8    q0, q1, d14         // r0 = y0 * 149 + (v >> 1)
46ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vaddw.u8    q4, q5, d14         // r1 = y1 * 149 + (v >> 1)
47ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
48ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vshll.u8    q7, d20, #2
49ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vadd.u16    q2, q1, q7          // b0 = y0 * 149 + (u << 2)
50ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vadd.u16    q6, q5, q7          // b1 = y1 * 149 + (u << 2)
51ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
52ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmov.i8     d14, #204
53ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmov.i8     d15, #254
54ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmull.u8    q11, d21, d14       // r2 = v * 204
55ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmull.u8    q12, d20, d15       // b2 = u * 254
56ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
57ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vhadd.u16   q0, q11             // r0 = (r0 + r2) >> 1
58ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vhadd.u16   q4, q11             // r1 = (r1 + r2) >> 1
59ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqadd.u16   q1, q14             // g0 = satu16(g0 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0)
60ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqadd.u16   q5, q14             // g1 = satu16(g1 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0)
61ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vhadd.u16   q2, q12             // b0 = (b0 + b2) >> 1
62ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vhadd.u16   q6, q12             // b1 = (b1 + b2) >> 1
63ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
64ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqsub.u16   q0, q13             // r0 = satu16(r0 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1)
65ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqsub.u16   q4, q13             // r1 = satu16(r1 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1)
66ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqsub.u16   q1, q8              // g0 = satu16(g0 - g2)
67ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqsub.u16   q5, q8              // g1 = satu16(g1 - g2)
68ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqsub.u16   q2, q15             // b0 = satu16(b0 - (16 * 149 + (128 << 2) + 128 * 254) >> 1)
69ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqsub.u16   q6, q15             // b1 = satu16(b1 - (16 * 149 + (128 << 2) + 128 * 254) >> 1)
70ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
71ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqrshrn.u16 d0, q0, #6
72ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqrshrn.u16 d1, q1, #7
73ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqrshrn.u16 d2, q4, #6
74ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqrshrn.u16 d3, q5, #7
75ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqrshrn.u16 d4, q2, #6
76ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vqrshrn.u16 d5, q6, #6
77ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
78ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vzip.u8     q0, q1
79ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vzip.u8     d4, d5
80ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.endm
81ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
82ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* Define the wrapper code which will load and store the data, iterate the
83ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * correct number of times, and safely handle the remainder at the end of the
84ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * loop.  Some sections of code are switched out depending on the data packing
85ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * being handled.
86ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
87ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.macro wrap_line kernel, interleaved=0, swapuv=0
88ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
89ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        movw        r5, #((16 * 149 + (128 >> 1) + 128 * 204) >> 1)
90ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vdup.i16    q13, r5
91ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        movw        r5, #((-16 * 149 + 128 * 50 + 128 * 104) >> 0)
92ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vdup.i16    q14, r5
93ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        movw        r5, #((16 * 149 + (128 << 2) + 128 * 254) >> 1)
94ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vdup.i16    q15, r5
95ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
96ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmov.i8     q3, #0xff
97ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
98ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        subs        r2, #16
99ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bhs         1f
100ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        b           2f
101ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
102ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        .align 4
103ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      vld2.u8     {d16,d17}, [r1]!
104ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        pld         [r1, #256]
105ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
106ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld2.u8     {d20,d21}, [r3]!
107ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie    .if \swapuv
108ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vswp        d20, d21
109ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie    .endif
110ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        pld         [r3, #256]
111ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
112ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d20, [r3]!
113ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d21, [r4]!
114ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        pld         [r3, #128]
115ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        pld         [r4, #128]
116ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
117ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
118ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        \kernel
119ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
1201d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie        subs        r2, #16
121ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
122ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vst4.u8     {d0,d2,d4,d6}, [r0]!
123ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vst4.u8     {d1,d3,d5,d7}, [r0]!
124ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
125ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bhs         1b
126ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
127ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie2:      adds        r2, #16
128ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         2f
129ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
130ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        /* To handle the tail portion of the data (something less than 16
131ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * bytes) load small power-of-two chunks into working registers.  It
132ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * doesn't matter where they end up in the register; the same process
133ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * will store them back out using the same positions and the
134ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * interaction between neighbouring pixels is constrained to odd
135ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * boundaries where the load operations don't interfere.
136ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         */
137ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmov.i8     q8, #0
138ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vmov.i8     q10, #0
139ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
140ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        tst         r2, #8
141ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         1f
142ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d17, [r1]!
143ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
144ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d21, [r3]!
145ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
146ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u32    d20[1], [r3]!
147ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u32    d21[1], [r4]!
148ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
149ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
150ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tst         r2, #4
151ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         1f
152ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u32    d16[1], [r1]!
153ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
154ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u32    d20[1], [r3]!
155ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
156ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u16    d20[1], [r3]!
157ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u16    d21[1], [r4]!
158ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
159ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tst         r2, #2
160ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         1f
161ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u16    d16[1], [r1]!
162ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
163ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u16    d20[1], [r3]!
164ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
165ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d20[1], [r3]!
166ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d21[1], [r4]!
167ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
168ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tst         r2, #1
169ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         1f
170ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d16[1], [r1]!
171ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
1721d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie        vld1.u16    d20[0], [r3]!
173ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
174ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d20[0], [r3]!
175ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vld1.u8     d21[0], [r4]!
176ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
177ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
178ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        /* One small impediment in the process above is that some of the load
179ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * operations can't perform byte-wise structure deinterleaving at the
180ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * same time as loading only part of a register.  So the data is loaded
181ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * linearly and unpacked manually at this point if necessary.
182ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         */
183ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      vuzp.8      d16, d17
184ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
185ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vuzp.8      d20, d21
186ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie    .if \swapuv
187ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vswp        d20, d21
188ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie    .endif
189ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
190ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
191ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        \kernel
192ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
193ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        /* As above but with the output; structured stores for partial vectors
194ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * aren't available, so the data is re-packed first and stored linearly.
195ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         */
196ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vzip.8  q0, q2
197ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vzip.8  q1, q3
198ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vzip.8  q0, q1
199ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vzip.8  q2, q3
200ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
201ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tst         r2, #8
202ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         1f
203ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vst1.u8     {d4,d5,d6,d7}, [r0]!
204ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
205ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tst         r2, #4
206ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         1f
207ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vst1.u8     {d2,d3}, [r0]!
208ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tst         r2, #2
209ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         1f
210ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vst1.u8     d1, [r0]!
211ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tst         r2, #1
212ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         2f
213ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vst1.u32    d0[1], [r0]!
214ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie2:
215ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.endm
216ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
217ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
218ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/*  void rsdIntrinsicYuv2_K(
219ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void *out,          // r0
220ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *yin,    // r1
221ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *uin,    // r2
222ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *vin,    // r3
223ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xstart,      // [sp]
224ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xend);       // [sp+#4]
225ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
226ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuv2_K)
227ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        push        {r4,r5}
228ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ldr         r5, [sp, #8]
229ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        mov         r4, r3
230ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        mov         r3, r2
231ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ldr         r2, [sp, #12]
232ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
233ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r0, r5, LSL #2
234ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r1, r5
235ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r3, r5, LSR #1
236ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r4, r5, LSR #1
237ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         r2, r5
238ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
239ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vpush       {d8-d15}
240ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
241ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        wrap_line yuvkern, 0
242ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
243ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vpop        {d8-d15}
244ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        pop         {r4,r5}
245ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bx lr
246ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuv2_K)
247ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
248ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/*  void rsdIntrinsicYuv_K(
249ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void *out,          // r0
250ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *yin,    // r1
251ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *uvin,   // r2
252ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xstart,      // r3
253ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xend);       // [sp]
254ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
255ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuv_K)
256ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        push        {r4,r5}
257ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bic         r4, r3, #1
258ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r3, r2, r4
259ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ldr         r2, [sp, #8]
260ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
261ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r0, r4, LSL #2
262ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r1, r4
263ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         r2, r4
264ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
265ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vpush       {d8-d15}
266ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
267ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        wrap_line yuvkern, 1, 1
268ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
269ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vpop        {d8-d15}
270ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        pop         {r4,r5}
271ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bx lr
272ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuv_K)
273ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
274ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/*  void rsdIntrinsicYuvR_K(
275ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void *out,          // r0
276ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *yin,    // r1
277ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *uvin,   // r2
278ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xstart,      // r3
279ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xend);       // [sp]
280ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
281ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuvR_K)
282ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        push        {r4,r5}
283ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bic         r4, r3, #1
284ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r3, r2, r4
285ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ldr         r2, [sp, #8]
286ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
287ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r0, r4, LSL #2
288ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         r1, r4
289ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         r2, r4
290ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
291ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vpush       {d8-d15}
292ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
293ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        wrap_line yuvkern, 1
294ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
295ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        vpop        {d8-d15}
296ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        pop         {r4,r5}
297ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bx lr
298ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuvR_K)
299