1ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/*
2ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Copyright (C) 2014 The Android Open Source Project
3ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *
4ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License");
5ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * you may not use this file except in compliance with the License.
6ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * You may obtain a copy of the License at
7ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *
8ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *      http://www.apache.org/licenses/LICENSE-2.0
9ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *
10ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * Unless required by applicable law or agreed to in writing, software
11ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS,
12ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * See the License for the specific language governing permissions and
14ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * limitations under the License.
15ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
16ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
17ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f:
18ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie#define END(f) .size f, .-f;
19ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
20ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* Perform the actual YuvToRGB conversion in a macro, from register to
21ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * register.  This macro will be called from within several different wrapper
22ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * variants for different data layouts.  Y data starts with the even and odd
23ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * bytes split into the low parts of v8 and v9 respectively.  U and V are in
24e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie * v10 and v11.  Working constants are pre-loaded into v24-v31, and v3 and v7
25e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie * are pre-loaded with a constant 0xff alpha channel.
26ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *
27ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * The complicated arithmetic is the result of refactoring the original
28ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * equations to avoid 16-bit overflow without losing any precision.
29ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
30e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie.macro yuvkern, regu=v10, regv=v11
31e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        /* v0   out R_lo / even R_lo accumulator
32e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v1   out G_lo / even G_lo accumulator
33e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v2   out B_lo / even B_lo accumulator
34e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v3   out A_lo / const 0xff*ff
35e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v4   out R_hi / even R_hi accumulator
36e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v5   out G_hi / even G_hi accumulator
37e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v6   out B_hi / even B_hi accumulator
38e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v7   out A_hi / const 0xff*ff
39e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v8   even Y   / G_lo luma tmp
40e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v9   odd Y    / G_lo luma tmp
41e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * \regu in U
42e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * \regv in V
43e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v12  R_lo luma tmp
44e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v13  B_lo luma tmp
45e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v14  R_hi luma tmp
46e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v15  B_hi luma tmp
47e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v16  odd R_lo accumulator
48e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v17  odd G_lo accumulator
49e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v18  odd B_lo accumulator
50e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v19  multiplier extra bits low
51e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v20  odd R_hi accumulator
52e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v21  odd G_hi accumulator
53e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v22  odd B_hi accumulator
54e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v23  multiplier extra bits high
55e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v24  constant 149
56e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v25  constant 50
57e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v26  constant 104
58e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v27  constant 204
59e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v28  constant 254
60e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v29  constant ((16 * 149 + (128 >> 1) + 128 * 204) >> 1)
61e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v30  constant ((-16 * 149 + 128 * 50 + 128 * 104) >> 0)
62e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * v31  constant ((16 * 149 + (128 << 2) + 128 * 254) >> 1)
63e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         */
64e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
65e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull       v1.8h,  v8.8b,  v24.8b      // g0 = y0 * 149
66e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull       v17.8h, v9.8b,  v24.8b      // g1 = y1 * 149
67e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull2      v5.8h,  v8.16b, v24.16b     // g0_hi = y0_hi * 149
68e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull2      v21.8h, v9.16b, v24.16b     // g1_hi = y1_hi * 149
69e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
70e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull       v8.8h, \regu\().8b, v25.8b     // g2 = u * 50 + v * 104
71e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umlal       v8.8h, \regv\().8b, v26.8b
72e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull2      v9.8h, \regu\().16b, v25.16b   // g2_hi = u_hi * 50 + v_hi * 104
73e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umlal2      v9.8h, \regv\().16b, v26.16b
74e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
75e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ushr        v19.16b, \regv\().16b, #1
76e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uaddw       v0.8h,  v1.8h,  v19.8b      // r0 = g0 + (v >> 1)
77e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uaddw       v16.8h, v17.8h, v19.8b      // r1 = g1 + (v >> 1)
78e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
79e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uaddw2      v4.8h,  v5.8h,  v19.16b     // r0_hi = g0_hi + (v_hi >> 1)
80e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uaddw2      v20.8h, v21.8h, v19.16b     // r1_hi = g1_hi + (v_hi >> 1)
81e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
82e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ushll       v19.8h, \regu\().8b,  #2
83e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ushll2      v23.8h, \regu\().16b, #2
84e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        add         v2.8h,  v1.8h,  v19.8h      // b0 = g0 + (u << 2)
85e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        add         v18.8h, v17.8h, v19.8h      // b1 = g1 + (u << 2)
86e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
87e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        add         v6.8h,  v5.8h,  v23.8h      // b0_hi = g0_hi + (u_hi << 2)
88e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        add         v22.8h, v21.8h, v23.8h      // b1_hi = g1_hi + (u_hi << 2)
89e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
90e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull       v12.8h, \regv\().8b, v27.8b    // r2 = v * 204
91e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull       v13.8h, \regu\().8b, v28.8b    // b2 = u * 254
92e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
93e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull2      v14.8h, \regv\().16b, v27.16b  // r2_hi = v_hi * 204
94e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        umull2      v15.8h, \regu\().16b, v28.16b  // b2_hi = u_hi * 254
95e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
96e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uhadd       v0.8h,  v0.8h,  v12.8h      // r0 = (r0 + r2) >> 1
97e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uhadd       v16.8h, v16.8h, v12.8h      // r1 = (r1 + r2) >> 1
98e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqadd       v1.8h,  v1.8h,  v30.8h      // g0 = satu16(g0 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0)
99e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqadd       v17.8h, v17.8h, v30.8h      // g1 = satu16(g1 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0)
100e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uhadd       v2.8h,  v2.8h,  v13.8h      // b0 = (b0 + b2) >> 1
101e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uhadd       v18.8h, v18.8h, v13.8h      // b1 = (b1 + b2) >> 1
102e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
103e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uhadd       v4.8h,  v4.8h,  v14.8h      // r0_hi = (r0_hi + r2_hi) >> 1
104e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uhadd       v20.8h, v20.8h, v14.8h      // r1_hi = (r1_hi + r2_hi) >> 1
105e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqadd       v5.8h,  v5.8h,  v30.8h      // g0_hi = satu16(g0_hi + (-16 * 149 + 128 * 50 + 128 * 104) >> 0)
106e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqadd       v21.8h, v21.8h, v30.8h      // g1_hi = satu16(g1_hi + (-16 * 149 + 128 * 50 + 128 * 104) >> 0)
107e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uhadd       v6.8h,  v6.8h,  v15.8h      // b0_hi = (b0_hi + b2_hi) >> 1
108e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uhadd       v22.8h, v22.8h, v15.8h      // b1_hi = (b1_hi + b2_hi) >> 1
109e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
110e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v0.8h,  v0.8h,  v29.8h      // r0 = satu16(r0 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1)
111e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v16.8h, v16.8h, v29.8h      // r1 = satu16(r1 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1)
112e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v1.8h,  v1.8h,  v8.8h       // g0 = satu16(g0 - g2)
113e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v17.8h, v17.8h, v8.8h       // g1 = satu16(g1 - g2)
114e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v2.8h,  v2.8h,  v31.8h      // b0 = satu16(b0 - (16 * 149 + (128 << 2) + 128 * 254) >> 1)
115e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v18.8h, v18.8h, v31.8h      // b1 = satu16(b1 - (16 * 149 + (128 << 2) + 128 * 254) >> 1)
116e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
117e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v4.8h,  v4.8h,  v29.8h      // r0_hi = satu16(r0_hi - (16 * 149 + (128 >> 1) + 128 * 204) >> 1)
118e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v20.8h, v20.8h, v29.8h      // r1_hi = satu16(r1_hi - (16 * 149 + (128 >> 1) + 128 * 204) >> 1)
119e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v5.8h,  v5.8h,  v9.8h       // g0_hi = satu16(g0_hi - g2_hi)
120e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v21.8h, v21.8h, v9.8h       // g1_hi = satu16(g1_hi - g2_hi)
121e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v6.8h,  v6.8h,  v31.8h      // b0_hi = satu16(b0_hi - (16 * 149 + (128 << 2) + 128 * 254) >> 1)
122e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqsub       v22.8h, v22.8h, v31.8h      // b1_hi = satu16(b1_hi - (16 * 149 + (128 << 2) + 128 * 254) >> 1)
123e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
124e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v0.8b,  v0.8h,  #6
125e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v16.8b, v16.8h, #6
126e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v1.8b,  v1.8h,  #7
127e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v17.8b, v17.8h, #7
128e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v2.8b,  v2.8h,  #6
129e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v18.8b, v18.8h, #6
130e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
131e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v4.8b,  v4.8h,  #6
132e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v20.8b, v20.8h, #6
133e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v5.8b,  v5.8h,  #7
134e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v21.8b, v21.8h, #7
135e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v6.8b,  v6.8h,  #6
136e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uqrshrn     v22.8b, v22.8h, #6
137e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
138e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v0.16b, v0.16b, v16.16b
139e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v1.16b, v1.16b, v17.16b
140e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v2.16b, v2.16b, v18.16b
141e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
142e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v4.16b, v4.16b, v20.16b
143e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v5.16b, v5.16b, v21.16b
144e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v6.16b, v6.16b, v22.16b
145ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.endm
146ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
147ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/* Define the wrapper code which will load and store the data, iterate the
148ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * correct number of times, and safely handle the remainder at the end of the
149ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * loop.  Some sections of code are switched out depending on the data packing
150ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie * being handled.
151ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
152ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.macro wrap_line kernel, interleaved=0, swapuv=0
153e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        movi        v24.16b, #149
154e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        movi        v25.16b, #50
155e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        movi        v26.16b, #104
156e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        movi        v27.16b, #204
157e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        movi        v28.16b, #254
158ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        mov         w5, #((16 * 149 + (128 >> 1) + 128 * 204) >> 1)
159e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        dup         v29.8h, w5
160ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        mov         w5, #((-16 * 149 + 128 * 50 + 128 * 104) >> 0)
161e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        dup         v30.8h, w5
162ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        mov         w5, #((16 * 149 + (128 << 2) + 128 * 254) >> 1)
163e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        dup         v31.8h, w5
164ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
165ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        movi        v3.16b, #0xff
166e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        movi        v7.16b, #0xff
167ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
168e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        subs        x2, x2, #32
169ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bhs         1f
170ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        b           2f
171ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
172ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        .align 4
173e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie1:      ld2         {v8.16b,v9.16b}, [x1], #32
174ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
175e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld2         {v10.16b,v11.16b}, [x3], #32
176ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
177e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.16b}, [x3], #16
178e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v11.16b}, [x4], #16
179ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
180ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
181e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .if \swapuv
182e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        \kernel regu=v11, regv=v10
183e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .else
184ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        \kernel
185e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .endif
186ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
187e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        subs        x2, x2, #32
188ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
189e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        st4         {v0.16b - v3.16b}, [x0], #64
190e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        st4         {v4.16b - v7.16b}, [x0], #64
191ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
192ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        bhs         1b
193ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
194e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie2:      adds        x2, x2, #32
195ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        beq         2f
196ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
197e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        /* To handle the tail portion of the data (something less than 32
198ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * bytes) load small power-of-two chunks into working registers.  It
199ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * doesn't matter where they end up in the register; the same process
200ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * will store them back out using the same positions and the
201ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * interaction between neighbouring pixels is constrained to odd
202ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * boundaries where the load operations don't interfere.
203ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         */
204ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        movi        v8.8b, #0
205ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        movi        v9.8b, #0
206e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        movi        v10.8b, #0
207e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        movi        v11.8b, #0
208ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
209e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        tbz         x2, #4, 1f
210e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v9.16b}, [x1], #16
211ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
212e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v11.16b}, [x3], #16
213ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
214e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.d}[1], [x3], #8
215e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v11.d}[1], [x4], #8
216e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .endif
217e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie1:      tbz         x2, #3, 1f
218e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v8.d}[1], [x1], #8
219e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .if \interleaved
220e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.d}[1], [x3], #8
221e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .else
222e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.s}[1], [x3], #4
223e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v11.s}[1], [x4], #4
224ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
225ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tbz         x2, #2, 1f
226ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v8.s}[1], [x1], #4
227ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
228e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.s}[1], [x3], #4
229ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
230e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.h}[1], [x3], #2
231e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v11.h}[1], [x4], #2
232ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
233ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tbz         x2, #1, 1f
234ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v8.h}[1], [x1], #2
235ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
236e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.h}[1], [x3], #2
237ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
238e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.b}[1], [x3], #1
239e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v11.b}[1], [x4], #1
240ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
241ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tbz         x2, #0, 1f
242ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v8.b}[1], [x1], #1
243ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
244e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.h}[0], [x3], #2
245ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .else
246e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v10.b}[0], [x3], #1
247e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        ld1         {v11.b}[0], [x4], #1
248ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
249ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
250ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        /* One small impediment in the process above is that some of the load
251ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * operations can't perform byte-wise structure deinterleaving at the
252ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * same time as loading only part of a register.  So the data is loaded
253ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * linearly and unpacked manually at this point if necessary.
254ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         */
255e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie1:      mov         v12.16b, v8.16b
256e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uzp1        v8.16b, v12.16b, v9.16b
257e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uzp2        v9.16b, v12.16b, v9.16b
258ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .if \interleaved
259e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        mov         v12.16b, v10.16b
260e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uzp1        v10.16b, v12.16b, v11.16b
261e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        uzp2        v11.16b, v12.16b, v11.16b
262ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie  .endif
263ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
264e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .if \swapuv
265e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        \kernel regu=v11, regv=v10
266e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .else
267ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        \kernel
268e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie  .endif
269ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
270ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        /* As above but with the output; structured stores for partial vectors
271ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         * aren't available, so the data is re-packed first and stored linearly.
272ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie         */
273e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v16.16b, v0.16b, v2.16b
274e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip2        v18.16b, v0.16b, v2.16b
275e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v17.16b, v1.16b, v3.16b
276e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip2        v19.16b, v1.16b, v3.16b
277e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v0.16b, v16.16b, v17.16b
278e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip2        v1.16b, v16.16b, v17.16b
279e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip1        v2.16b, v18.16b, v19.16b
280e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        zip2        v3.16b, v18.16b, v19.16b
281e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie
282e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        /* Luckily v4-v7 don't need to be unzipped because the complete set of
283e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie         * four and can be stored using st4. */
284ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
285e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        tbz         x2, #4, 1f
286e8814f7c80f84f08e60150e70b1a4e6a11b588bdSimon Hosie        st4         {v4.16b - v7.16b}, [x0], #64
287ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tbz         x2, #3, 1f
288ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v2.16b,v3.16b}, [x0], #32
289ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tbz         x2, #2, 1f
290ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v1.16b}, [x0], #16
291ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tbz         x2, #1, 1f
292ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v0.d}[1], [x0], #8
293ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie1:      tbz         x2, #0, 2f
294ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v0.s}[1], [x0], #4
295ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie2:
296ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie.endm
297ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
298ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
299ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/*  void rsdIntrinsicYuv2_K(
300ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void *out,          // x0
301ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *yin,    // x1
302ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *uin,    // x2
303ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *vin,    // x3
304ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xstart,      // x4
305ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xend);       // x5
306ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
307ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuv2_K)
308ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        lsr         x6, x4, #1
309ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x0, x0, x4, LSL #2
310ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x1, x1, x4
311ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x4, x3, x6
312ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x3, x2, x6
3139732e859ff5d1911915eb83411c9b1ae991c7523Simon Hosie        sub         x2, x5, x6, LSL #1
314ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
315ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         x6, sp, #32
316ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         sp, sp, #64
317ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v8.1d - v11.1d}, [sp]
318ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v12.1d - v15.1d}, [x6]
319ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
320ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        wrap_line yuvkern, 0
321ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
322ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v8.1d - v11.1d}, [sp], #32
323ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v12.1d - v15.1d}, [sp], #32
324ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ret
325ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuv2_K)
326ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
327ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/*  void rsdIntrinsicYuv_K(
328ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void *out,          // x0
329ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *yin,    // x1
330ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *uvin,   // x2
331ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xstart,      // x3
332ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xend);       // x4
333ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
334ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuv_K)
3351d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie        bic         x5, x3, #1
336ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x0, x0, x5, LSL #2
337ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x1, x1, x5
338ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x3, x2, x5
339ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         x2, x4, x5
340ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
341ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         x5, sp, #32
342ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         sp, sp, #64
343ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v8.1d - v11.1d}, [sp]
344ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v12.1d - v15.1d}, [x5]
345ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
346ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        wrap_line yuvkern, 1, 1
347ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
348ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v8.1d - v11.1d}, [sp], #32
349ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v12.1d - v15.1d}, [sp], #32
350ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ret
351ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuv_K)
352ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
353ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie/*  void rsdIntrinsicYuvR_K(
354ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void *out,          // x0
355ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *yin,    // x1
356ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          void const *uvin,   // x2
357ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xstart,      // x3
358ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie *          size_t xend);       // x4
359ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie */
360ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieENTRY(rsdIntrinsicYuvR_K)
3611d9c887c58d115975e01c9d500595f503803dc8cSimon Hosie        bic         x5, x3, #1
362ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x0, x0, x5, LSL #2
363ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x1, x1, x5
364ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        add         x3, x2, x5
365ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         x2, x4, x5
366ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
367ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         x5, sp, #32
368ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        sub         sp, sp, #64
369ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v8.1d - v11.1d}, [sp]
370ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        st1         {v12.1d - v15.1d}, [x5]
371ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
372ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        wrap_line yuvkern, 1
373ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie
374ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v8.1d - v11.1d}, [sp], #32
375ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ld1         {v12.1d - v15.1d}, [sp], #32
376ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon Hosie        ret
377ccd7a46d0c0052209bf3ab8657f40622065d1d1fSimon HosieEND(rsdIntrinsicYuvR_K)
378