15dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie/*
25dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * Copyright (C) 2014 The Android Open Source Project
35dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *
45dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * Licensed under the Apache License, Version 2.0 (the "License");
55dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * you may not use this file except in compliance with the License.
65dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * You may obtain a copy of the License at
75dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *
85dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *      http://www.apache.org/licenses/LICENSE-2.0
95dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *
105dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * Unless required by applicable law or agreed to in writing, software
115dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * distributed under the License is distributed on an "AS IS" BASIS,
125dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
135dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * See the License for the specific language governing permissions and
145dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * limitations under the License.
155dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie */
165dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
175dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f:
185dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie#define END(f) .size f, .-f;
195dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
205dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
215dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie.macro lanepair dst, src0, src1, xr0, xr1, yr0, yr1, zr0, zr1
225dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
235dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            smov        x6, \src0
245dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            smov        x7, \src1
255dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
265dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            add         x6, x6, x3
275dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            add         x7, x7, x3
285dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
295dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ld1         {v16.2s}, [x6], x4
305dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ld1         {v17.2s}, [x7], x4
315dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
325dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ld1         {v18.2s}, [x6], x5
335dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ld1         {v19.2s}, [x7], x5
345dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
355dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            dup         v8.8b, \yr0
365dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            dup         v9.8b, \yr1
375dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            /* Y interpolate, front, lanes 0 and 1 -> v12 and v13 */
385dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            zip1        v12.16b, v5.16b, v16.16b
395dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            zip1        v13.16b, v5.16b, v17.16b
405dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl       v12.8h, v16.8b, v8.8b
415dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl       v13.8h, v17.8b, v9.8b
425dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal       v12.8h, v18.8b, v8.8b
435dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal       v13.8h, v19.8b, v9.8b
445dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
455dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ld1         {v18.2s}, [x6]
465dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ld1         {v19.2s}, [x7]
475dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
485dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            sub         x6, x6, x4
495dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            sub         x7, x7, x4
505dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
515dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ld1         {v16.2s}, [x6]
525dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ld1         {v17.2s}, [x7]
535dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
545dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            /* Y interpolate, rear, lanes 0 and 1 -> v14 and v15 */
555dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            zip1        v14.16b, v5.16b, v16.16b
565dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            zip1        v15.16b, v5.16b, v17.16b
575dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl       v14.8h, v16.8b, v8.8b
585dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl       v15.8h, v17.8b, v9.8b
595dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal       v14.8h, v18.8b, v8.8b
605dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal       v15.8h, v19.8b, v9.8b
615dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
625dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            /* Z interpolate, lane 0 v12/v14 -> v10 */
635dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushll       v8.4s, v12.4h, #8
645dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushll2      v9.4s, v12.8h, #8
655dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl       v8.4s, v12.4h, \zr0
665dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl2      v9.4s, v12.8h, \zr0
675dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal       v8.4s, v14.4h, \zr0
685dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal2      v9.4s, v14.8h, \zr0
695dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            rshrn       v10.4h, v8.4s, #8
705dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            rshrn2      v10.8h, v9.4s, #8
715dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
725dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            /* Z interpolate, lane 1 v13/v15 -> v11 */
735dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushll       v8.4s, v13.4h, #8
745dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushll2      v9.4s, v13.8h, #8
755dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl       v8.4s, v13.4h, \zr1
765dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl2      v9.4s, v13.8h, \zr1
775dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal       v8.4s, v15.4h, \zr1
785dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal2      v9.4s, v15.8h, \zr1
795dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            rshrn       v11.4h, v8.4s, #8
805dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            rshrn2      v11.8h, v9.4s, #8
815dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
825dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            /* X interpolate, lanes 0 and 1 v10,v11 -> v14 */
835dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushll       v8.4s, v10.4h, #8
845dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushll       v9.4s, v11.4h, #8
855dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl       v8.4s, v10.4h, \xr0
865dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlsl       v9.4s, v11.4h, \xr1
875dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal2      v8.4s, v10.8h, \xr0
885dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            umlal2      v9.4s, v11.8h, \xr1
895dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            shrn        v14.4h, v8.4s, #8
905dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            shrn2       v14.8h, v9.4s, #8
915dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
925dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            /* pack lanes 0-1 -> v6 */
935dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie.ifc \dst, v20.16b
945dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uqrshrn2    \dst, v14.8h, #8
955dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie.else ; .ifc \dst, v21.16b
965dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uqrshrn2    \dst, v14.8h, #8
975dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie.else
985dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uqrshrn     \dst, v14.8h, #8
995dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie.endif ; .endif
1005dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie.endm
1015dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
10207e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie/* void rsdIntrinsic3DLUT_K(
1035dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          void *dst,          // x0
1045dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          void const *in,     // x1
1055dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          size_t count,       // x2
1065dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          void const *lut,    // x3
1075dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          int32_t pitchy,     // w4
1085dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          int32_t pitchz,     // w5
1095dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          int dimx,           // w6
1105dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          int dimy,           // w7
1115dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie *          int dimz);          // [sp]
1125dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie */
1135dcaaa5f50926bebf6877e254c521faa7e2593e3Simon HosieENTRY(rsdIntrinsic3DLUT_K)
1145dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ldr         w8, [sp]
1155dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            stp         d8, d9, [sp, #-64]!
1165dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            stp         d10, d11, [sp, #16]
1175dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            stp         d12, d13, [sp, #32]
1185dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            stp         d14, d15, [sp, #48]
1195dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            movi        v4.8b, #1
1205dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ins         v4.h[0], w6
1215dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ins         v4.h[1], w7
1225dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ins         v4.h[2], w8
1235dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ins         v4.s[2], w4
1245dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ins         v4.s[3], w5
1255dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            movi        v5.16b, #0
1265dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
12707e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            subs        x2, x2, #8
12807e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            bge         2f
129e2af295f94c8fb17ba51d0e6a199d5ca265f92daChih-Hung Hsieh            cmn         x2, #8    // same as cmp x2, #-8
13007e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            ble         9f
13107e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            b           4f
1325dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1335dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            .align 6
13407e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie1:          st4         {v20.8b,v21.8b,v22.8b,v23.8b}, [x0], #32
1355dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie/* x0  = dst
1365dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * x1  = src
1375dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * x2  = count
1385dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * x3  = lut
1395dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * x4  = pitchy
1405dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * x5  = pitchz
1415dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * x6 = offset0
1425dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * x7 = offset1
1435dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie */
14407e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie2:          ld4         {v0.8b-v3.8b}, [x1], #32
1455dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie/* v0,v1,v2,v3 source data
1465dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * v4 dimensions and pitches
1475dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie */
14807e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie3:          uxtl        v0.8h, v0.8b
1495dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uxtl        v1.8h, v1.8b
1505dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uxtl        v2.8h, v2.8b
1515dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mul         v0.8h, v0.8h, v4.h[0]
1525dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mul         v1.8h, v1.8h, v4.h[1]
1535dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mul         v2.8h, v2.8h, v4.h[2]
1545dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1555dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie/* ursra below would be more accurate, but this can result in a dim.0 case
1565dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * where we try to read from the limit of the array and the limit +1 to
1575dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * interpolate, even though the fractional component is zero.  Strictly this is
1585dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * correct, except for the llegal access problem.
1595dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie */
1605dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            usra        v0.8h, v0.8h, #8
1615dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            usra        v1.8h, v1.8h, #8
1625dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            usra        v2.8h, v2.8h, #8
1635dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1645dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushr        v12.8h, v0.8h, #8
1655dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushr        v13.8h, v1.8h, #8
1665dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushr        v14.8h, v2.8h, #8
1675dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            bic         v0.8h, #0xff, LSL #8
1685dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            xtn         v1.8b, v1.8h
1695dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            bic         v2.8h, #0xff, LSL #8
1705dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1715dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie/* v0.8h,v1.8b,v2.hb fractional offset
1725dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie * v12.8h,v13.8h,v14.8h integer offset
1735dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie */
1745dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1755dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushll       v6.4s, v12.4h, #2
1765dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ushll2      v7.4s, v12.8h, #2
1775dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uxtl        v8.4s, v13.4h
1785dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uxtl2       v9.4s, v13.8h
1795dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uxtl        v10.4s, v14.4h
1805dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uxtl2       v11.4s, v14.8h
1815dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mla         v6.4s, v8.4s,  v4.s[2]
1825dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mla         v7.4s, v9.4s,  v4.s[2]
1835dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mla         v6.4s, v10.4s, v4.s[3]
1845dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mla         v7.4s, v11.4s, v4.s[3]
1855dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1865dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie/* v6,v7 list of table offsets */
1875dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1885dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie        /* lanes 0 and 1 */
1895dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            lanepair    dst=v20.8b,  src0=v6.s[0], src1=v6.s[1], xr0=v0.h[0], xr1=v0.h[1], yr0=v1.b[0], yr1=v1.b[1], zr0=v2.h[0], zr1=v2.h[1]
1905dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1915dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie        /* lanes 2 and 3 */
1925dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            lanepair    dst=v20.16b, src0=v6.s[2], src1=v6.s[3], xr0=v0.h[2], xr1=v0.h[3], yr0=v1.b[2], yr1=v1.b[3], zr0=v2.h[2], zr1=v2.h[3]
1935dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1945dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie        /* lanes 4 and 5 */
1955dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            lanepair    dst=v21.8b,  src0=v7.s[0], src1=v7.s[1], xr0=v0.h[4], xr1=v0.h[5], yr0=v1.b[4], yr1=v1.b[5], zr0=v2.h[4], zr1=v2.h[5]
1965dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
1975dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie        /* lanes 6 and 7 */
1985dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            lanepair    dst=v21.16b, src0=v7.s[2], src1=v7.s[3], xr0=v0.h[6], xr1=v0.h[7], yr0=v1.b[6], yr1=v1.b[7], zr0=v2.h[6], zr1=v2.h[7]
1995dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
2005dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uzp1        v6.16b, v20.16b, v21.16b
2015dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uzp2        v7.16b, v20.16b, v21.16b
2025dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uzp1        v20.16b, v6.16b, v7.16b
2035dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            uzp2        v22.16b, v6.16b, v7.16b
2045dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mov         v21.d[0], v20.d[1]
20507e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie
20607e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            subs        x2, x2, #8
2075dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            mov         v23.8b, v3.8b
2085dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
20907e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            bge         1b
2105dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie
211e2af295f94c8fb17ba51d0e6a199d5ca265f92daChih-Hung Hsieh            cmn         x2, #8    // same as cmp x2, #-8
21207e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            blt         1f
21307e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie
21407e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            st4         {v20.8b,v21.8b,v22.8b,v23.8b}, [x0], #32
21507e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            beq         9f
21607e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie
21707e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            /* fill the vector  with a safe value */
21807e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie4:          ld4r        {v0.8b-v3.8b}, [x1]
21907e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            tbz         x2, #2, 2f
22007e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            ld4         {v0.b-v3.b}[0], [x1], #4
22107e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            ld4         {v0.b-v3.b}[1], [x1], #4
22207e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            ld4         {v0.b-v3.b}[2], [x1], #4
22307e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            ld4         {v0.b-v3.b}[3], [x1], #4
22407e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie2:          tbz         x2, #1, 2f
22507e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            ld4         {v0.b-v3.b}[4], [x1], #4
22607e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            ld4         {v0.b-v3.b}[5], [x1], #4
22707e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie2:          tbz         x2, #0, 2f
22807e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            ld4         {v0.b-v3.b}[6], [x1], #4
22907e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie2:          b           3b
23007e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie
23107e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie1:          tst         x2, #4
23207e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            beq         2f
23307e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            st4         {v20.b-v23.b}[0], [x0], #4
23407e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            st4         {v20.b-v23.b}[1], [x0], #4
23507e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            st4         {v20.b-v23.b}[2], [x0], #4
23607e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            st4         {v20.b-v23.b}[3], [x0], #4
23707e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie2:          tst         x2, #2
23807e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            beq         2f
23907e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            st4         {v20.b-v23.b}[4], [x0], #4
24007e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            st4         {v20.b-v23.b}[5], [x0], #4
24107e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie2:          tst         x2, #1
24207e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            beq         9f
24307e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie            st4         {v20.b-v23.b}[6], [x0], #4
24407e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie
24507e4665c04a71462e6cfc1c2bb2300a9ed111e60Simon Hosie9:          ldp         d14, d15, [sp, #48]
2465dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ldp         d12, d13, [sp, #32]
2475dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ldp         d10, d11, [sp, #16]
2485dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ldp         d8, d9, [sp], #64
2495dcaaa5f50926bebf6877e254c521faa7e2593e3Simon Hosie            ret
2505dcaaa5f50926bebf6877e254c521faa7e2593e3Simon HosieEND(rsdIntrinsic3DLUT_K)
251