10462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie/*
20462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * Copyright (C) 2014 The Android Open Source Project
30462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *
40462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * Licensed under the Apache License, Version 2.0 (the "License");
50462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * you may not use this file except in compliance with the License.
60462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * You may obtain a copy of the License at
70462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *
80462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *      http://www.apache.org/licenses/LICENSE-2.0
90462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *
100462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * Unless required by applicable law or agreed to in writing, software
110462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * distributed under the License is distributed on an "AS IS" BASIS,
120462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * See the License for the specific language governing permissions and
140462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * limitations under the License.
150462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie */
160462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
170462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f:
180462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie#define END(f) .size f, .-f;
190462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
200462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
210462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.macro vmxx_f32 i, mask, opd, opa, opb
220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie  .if (\i) & \mask
230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .if (\i) & (\mask - 1)
240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie        fmla            \opd, \opa, \opb
250462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .else
260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie        fmul            \opd, \opa, \opb
270462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .endif
280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie  .endif
290462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endm
300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
310462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.macro vadd_f32 i, mask, opd, opa, opb, stupidsyntax1, stupidsyntax2
320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie  .if (\i) & \mask
330462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .if (\i) & (\mask - 1)
340462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie        fadd            \opd, \opa, \opb
350462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .else
360462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie        mov             \stupidsyntax1, \stupidsyntax2
370462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .endif
380462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie  .endif
390462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endm
400462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
410462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.macro vmxx_s16 i, mask, opd, opa, opb
420462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie  .if (\i) & \mask
430462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .if (\i) & (\mask - 1 + 16)
440462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie        smlal           \opd, \opa, \opb
450462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .else
460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie        smull           \opd, \opa, \opb
470462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .endif
480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie  .endif
490462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endm
500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
510462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.macro vmxx2_s16 i, mask, opd, opa, opb
520462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie  .if (\i) & \mask
530462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .if (\i) & (\mask - 1 + 16)
540462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie        smlal2          \opd, \opa, \opb
550462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .else
560462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie        smull2          \opd, \opa, \opb
570462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie    .endif
580462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie  .endif
590462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endm
600462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
610462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie/* x0 = dst
620462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x1 = src
630462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x2 = count
640462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x3 = params
650462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x4 = column0_fn
660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x5 = column1_fn
670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x6 = column2_fn
680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x7 = column3_fn
690462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x8 = store_fn
700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * x9 = load_fn
710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie */
720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.irp i, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_col0_\i:
760462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .if \i & 16
770462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v6.4s, v4.s[0]
780462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v7.4s, v4.s[0]
790462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .endif
800462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 1, v6.4s, v12.4h, v0.h[0]
810462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 2, v6.4s, v13.4h, v0.h[4]
820462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 4, v6.4s, v14.4h, v1.h[0]
830462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 8, v6.4s, v15.4h, v1.h[4]
840462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 1, v7.4s, v12.8h, v0.h[0]
850462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 2, v7.4s, v13.8h, v0.h[4]
860462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 4, v7.4s, v14.8h, v1.h[0]
870462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 8, v7.4s, v15.8h, v1.h[4]
880462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v8.4h, v6.4s, #8
890462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v8.8h, v7.4s, #8
900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x5
910462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_col0_n\i:
930462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .if (\i^31) & 16
940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v6.4s, v4.s[0]
950462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v7.4s, v4.s[0]
960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .endif
970462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 1, v6.4s, v12.4h, v0.h[0]
980462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 2, v6.4s, v13.4h, v0.h[4]
990462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 4, v6.4s, v14.4h, v1.h[0]
1000462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 8, v6.4s, v15.4h, v1.h[4]
1010462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 1, v7.4s, v12.8h, v0.h[0]
1020462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 2, v7.4s, v13.8h, v0.h[4]
1030462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 4, v7.4s, v14.8h, v1.h[0]
1040462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 8, v7.4s, v15.8h, v1.h[4]
1050462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v8.4h, v6.4s, #8
1060462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v8.8h, v7.4s, #8
1070462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x5
1080462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
1090462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
1100462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_col1_\i:
1110462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .if \i & 16
1120462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v6.4s, v4.s[1]
1130462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v7.4s, v4.s[1]
1140462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .endif
1150462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 1, v6.4s, v12.4h, v0.h[1]
1160462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 2, v6.4s, v13.4h, v0.h[5]
1170462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 4, v6.4s, v14.4h, v1.h[1]
1180462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 8, v6.4s, v15.4h, v1.h[5]
1190462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 1, v7.4s, v12.8h, v0.h[1]
1200462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 2, v7.4s, v13.8h, v0.h[5]
1210462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 4, v7.4s, v14.8h, v1.h[1]
1220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 8, v7.4s, v15.8h, v1.h[5]
1230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v9.4h, v6.4s, #8
1240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v9.8h, v7.4s, #8
1250462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x6
1260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
1270462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_col1_n\i:
1280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .if (\i^31) & 16
1290462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v6.4s, v4.s[1]
1300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v7.4s, v4.s[1]
1310462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .endif
1320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 1, v6.4s, v12.4h, v0.h[1]
1330462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 2, v6.4s, v13.4h, v0.h[5]
1340462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 4, v6.4s, v14.4h, v1.h[1]
1350462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 8, v6.4s, v15.4h, v1.h[5]
1360462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 1, v7.4s, v12.8h, v0.h[1]
1370462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 2, v7.4s, v13.8h, v0.h[5]
1380462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 4, v7.4s, v14.8h, v1.h[1]
1390462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 8, v7.4s, v15.8h, v1.h[5]
1400462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v9.4h, v6.4s, #8
1410462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v9.8h, v7.4s, #8
1420462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x6
1430462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
1440462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
1450462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_col2_\i:
1460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .if \i & 16
1470462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v6.4s, v4.s[2]
1480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v7.4s, v4.s[2]
1490462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .endif
1500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 1, v6.4s, v12.4h, v0.h[2]
1510462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 2, v6.4s, v13.4h, v0.h[6]
1520462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 4, v6.4s, v14.4h, v1.h[2]
1530462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 8, v6.4s, v15.4h, v1.h[6]
1540462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 1, v7.4s, v12.8h, v0.h[2]
1550462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 2, v7.4s, v13.8h, v0.h[6]
1560462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 4, v7.4s, v14.8h, v1.h[2]
1570462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 8, v7.4s, v15.8h, v1.h[6]
1580462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v10.4h, v6.4s, #8
1590462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v10.8h, v7.4s, #8
1600462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x7
1610462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
1620462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_col2_n\i:
1630462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .if (\i^31) & 16
1640462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v6.4s, v4.s[2]
1650462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v7.4s, v4.s[2]
1660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .endif
1670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 1, v6.4s, v12.4h, v0.h[2]
1680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 2, v6.4s, v13.4h, v0.h[6]
1690462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 4, v6.4s, v14.4h, v1.h[2]
1700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 8, v6.4s, v15.4h, v1.h[6]
1710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 1, v7.4s, v12.8h, v0.h[2]
1720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 2, v7.4s, v13.8h, v0.h[6]
1730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 4, v7.4s, v14.8h, v1.h[2]
1740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 8, v7.4s, v15.8h, v1.h[6]
1750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v10.4h, v6.4s, #8
1760462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v10.8h, v7.4s, #8
1770462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x7
1780462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
1790462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
1800462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_col3_\i:
1810462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .if \i & 16
1820462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v6.4s, v4.s[3]
1830462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v7.4s, v4.s[3]
1840462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .endif
1850462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 1, v6.4s, v12.4h, v0.h[3]
1860462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 2, v6.4s, v13.4h, v0.h[7]
1870462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 4, v6.4s, v14.4h, v1.h[3]
1880462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i, 8, v6.4s, v15.4h, v1.h[7]
1890462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 1, v7.4s, v12.8h, v0.h[3]
1900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 2, v7.4s, v13.8h, v0.h[7]
1910462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 4, v7.4s, v14.8h, v1.h[3]
1920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i, 8, v7.4s, v15.8h, v1.h[7]
1930462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v11.4h, v6.4s, #8
1940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v11.8h, v7.4s, #8
1950462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x8
1960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
1970462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_col3_n\i:
1980462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .if (\i^31) & 16
1990462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v6.4s, v4.s[3]
2000462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v7.4s, v4.s[3]
2010462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie      .endif
2020462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 1, v6.4s, v12.4h, v0.h[3]
2030462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 2, v6.4s, v13.4h, v0.h[7]
2040462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 4, v6.4s, v14.4h, v1.h[3]
2050462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_s16    \i^31, 8, v6.4s, v15.4h, v1.h[7]
2060462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 1, v7.4s, v12.8h, v0.h[3]
2070462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 2, v7.4s, v13.8h, v0.h[7]
2080462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 4, v7.4s, v14.8h, v1.h[3]
2090462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx2_s16   \i^31, 8, v7.4s, v15.8h, v1.h[7]
2100462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v11.4h, v6.4s, #8
2110462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v11.8h, v7.4s, #8
2120462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x8
2130462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
2140462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
2150462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_col0_\i:
2160462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 1,  v8.4s, v12.4s, v0.s[0]
2170462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 2,  v8.4s, v13.4s, v1.s[0]
2180462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 4,  v8.4s, v14.4s, v2.s[0]
2190462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 8,  v8.4s, v15.4s, v3.s[0]
2200462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i, 16, v8.4s, v8.4s, v4.4s,        v8.16b, v4.16b
2210462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 1,  v16.4s, v20.4s, v0.s[0]
2220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 2,  v16.4s, v21.4s, v1.s[0]
2230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 4,  v16.4s, v22.4s, v2.s[0]
2240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 8,  v16.4s, v23.4s, v3.s[0]
2250462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i, 16, v16.4s, v16.4s, v4.4s,      v16.16b, v4.16b
2260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x5
2270462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
2280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 4
2290462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_col0_n\i:
2300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 1,  v8.4s, v12.4s, v0.s[0]
2310462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 2,  v8.4s, v13.4s, v1.s[0]
2320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 4,  v8.4s, v14.4s, v2.s[0]
2330462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 8,  v8.4s, v15.4s, v3.s[0]
2340462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i^31, 16, v8.4s, v8.4s, v4.4s,     v8.16b, v4.16b
2350462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 1,  v16.4s, v20.4s, v0.s[0]
2360462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 2,  v16.4s, v21.4s, v1.s[0]
2370462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 4,  v16.4s, v22.4s, v2.s[0]
2380462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 8,  v16.4s, v23.4s, v3.s[0]
2390462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i^31, 16, v16.4s, v16.4s, v4.4s,   v16.16b, v4.16b
2400462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x5
2410462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
2420462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
2430462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_col1_\i:
2440462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 1,  v9.4s, v12.4s, v0.s[1]
2450462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 2,  v9.4s, v13.4s, v1.s[1]
2460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 4,  v9.4s, v14.4s, v2.s[1]
2470462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 8,  v9.4s, v15.4s, v3.s[1]
2480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i, 16, v9.4s, v9.4s, v5.4s,        v9.16b, v5.16b
2490462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 1,  v17.4s, v20.4s, v0.s[1]
2500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 2,  v17.4s, v21.4s, v1.s[1]
2510462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 4,  v17.4s, v22.4s, v2.s[1]
2520462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 8,  v17.4s, v23.4s, v3.s[1]
2530462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i, 16, v17.4s, v17.4s, v5.4s,      v17.16b, v5.16b
2540462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x6
2550462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
2560462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 4
2570462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_col1_n\i:
2580462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 1,  v9.4s, v12.4s, v0.s[1]
2590462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 2,  v9.4s, v13.4s, v1.s[1]
2600462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 4,  v9.4s, v14.4s, v2.s[1]
2610462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 8,  v9.4s, v15.4s, v3.s[1]
2620462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i^31, 16, v9.4s, v9.4s, v5.4s,     v9.16b, v5.16b
2630462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 1,  v17.4s, v20.4s, v0.s[1]
2640462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 2,  v17.4s, v21.4s, v1.s[1]
2650462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 4,  v17.4s, v22.4s, v2.s[1]
2660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 8,  v17.4s, v23.4s, v3.s[1]
2670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i^31, 16, v17.4s, v17.4s, v5.4s,   v17.16b, v5.16b
2680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x6
2690462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
2700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
2710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_col2_\i:
2720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 1,  v10.4s, v12.4s, v0.s[2]
2730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 2,  v10.4s, v13.4s, v1.s[2]
2740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 4,  v10.4s, v14.4s, v2.s[2]
2750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 8,  v10.4s, v15.4s, v3.s[2]
2760462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i, 16, v10.4s, v10.4s, v6.4s,      v10.16b, v6.16b
2770462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 1,  v18.4s, v20.4s, v0.s[2]
2780462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 2,  v18.4s, v21.4s, v1.s[2]
2790462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 4,  v18.4s, v22.4s, v2.s[2]
2800462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 8,  v18.4s, v23.4s, v3.s[2]
2810462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i, 16, v18.4s, v18.4s, v6.4s,      v18.16b, v6.16b
2820462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x7
2830462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
2840462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 4
2850462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_col2_n\i:
2860462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 1,  v10.4s, v12.4s, v0.s[2]
2870462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 2,  v10.4s, v13.4s, v1.s[2]
2880462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 4,  v10.4s, v14.4s, v2.s[2]
2890462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 8,  v10.4s, v15.4s, v3.s[2]
2900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i^31, 16, v10.4s, v10.4s, v6.4s,   v10.16b, v6.16b
2910462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 1,  v18.4s, v20.4s, v0.s[2]
2920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 2,  v18.4s, v21.4s, v1.s[2]
2930462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 4,  v18.4s, v22.4s, v2.s[2]
2940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 8,  v18.4s, v23.4s, v3.s[2]
2950462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i^31, 16, v18.4s, v18.4s, v6.4s,   v18.16b, v6.16b
2960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x7
2970462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
2980462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
2990462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_col3_\i:
3000462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 1,  v11.4s, v12.4s, v0.s[3]
3010462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 2,  v11.4s, v13.4s, v1.s[3]
3020462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 4,  v11.4s, v14.4s, v2.s[3]
3030462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 8,  v11.4s, v15.4s, v3.s[3]
3040462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i, 16, v11.4s, v11.4s, v7.4s,      v11.16b, v7.16b
3050462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 1,  v19.4s, v20.4s, v0.s[3]
3060462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 2,  v19.4s, v21.4s, v1.s[3]
3070462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 4,  v19.4s, v22.4s, v2.s[3]
3080462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i, 8,  v19.4s, v23.4s, v3.s[3]
3090462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i, 16, v19.4s, v19.4s, v7.4s,      v19.16b, v7.16b
3100462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x8
3110462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
3120462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 4
3130462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_col3_n\i:
3140462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 1,  v11.4s, v12.4s, v0.s[3]
3150462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 2,  v11.4s, v13.4s, v1.s[3]
3160462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 4,  v11.4s, v14.4s, v2.s[3]
3170462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 8,  v11.4s, v15.4s, v3.s[3]
3180462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i^31, 16, v11.4s, v11.4s, v7.4s,  v11.16b, v7.16b
3190462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 1,  v19.4s, v20.4s, v0.s[3]
3200462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 2,  v19.4s, v21.4s, v1.s[3]
3210462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 4,  v19.4s, v22.4s, v2.s[3]
3220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vmxx_f32    \i^31, 8,  v19.4s, v23.4s, v3.s[3]
3230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            vadd_f32    \i^31, 16, v19.4s, v19.4s, v7.4s,  v19.16b, v7.16b
3240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x8
3250462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
3260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endr
3270462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
3280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
3290462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_ldu4:
3300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld4         {v20.8b,v21.8b,v22.8b,v23.8b}, [x1], #32
3310462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v20.8h, v20.8b
3320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v21.8h, v21.8b
3330462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v22.8h, v22.8b
3340462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v23.8h, v23.8b
3350462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v12.4s, v20.4h
3360462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v13.4s, v21.4h
3370462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v14.4s, v22.4h
3380462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v15.4s, v23.4h
3390462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v20.4s, v20.8h
3400462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v21.4s, v21.8h
3410462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v22.4s, v22.8h
3420462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v23.4s, v23.8h
3430462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v12.4s, v12.4s
3440462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v13.4s, v13.4s
3450462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v14.4s, v14.4s
3460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v15.4s, v15.4s
3470462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v20.4s, v20.4s
3480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v21.4s, v21.4s
3490462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v22.4s, v22.4s
3500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v23.4s, v23.4s
3510462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
3520462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
3530462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
3540462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_ldu4:
3550462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld4         {v12.8b,v13.8b,v14.8b,v15.8b}, [x1], #32
3560462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v12.8h, v12.8b
3570462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v13.8h, v13.8b
3580462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v14.8h, v14.8b
3590462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v15.8h, v15.8b
3600462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
3610462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
3620462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
3630462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_ldu3:
3640462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld4         {v20.8b,v21.8b,v22.8b,v23.8b}, [x1], #32
3650462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v20.8h, v20.8b
3660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v21.8h, v21.8b
3670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v22.8h, v22.8b
3680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v12.4s, v20.4h
3690462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v13.4s, v21.4h
3700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v14.4s, v22.4h
3710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v20.4s, v20.8h
3720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v21.4s, v21.8h
3730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v22.4s, v22.8h
3740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v12.4s, v12.4s
3750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v13.4s, v13.4s
3760462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v14.4s, v14.4s
3770462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v20.4s, v20.4s
3780462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v21.4s, v21.4s
3790462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v22.4s, v22.4s
3800462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
3810462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
3820462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_ldu3:
3830462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld4         {v12.8b,v13.8b,v14.8b,v15.8b}, [x1], #32
3840462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v12.8h, v12.8b
3850462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v13.8h, v13.8b
3860462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v14.8h, v14.8b
3870462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
3880462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
3890462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
3900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_ldu1:
3910462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v20.8b}, [x1], #8
3920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v20.8h, v20.8b
3930462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v12.4s, v20.4h
3940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v20.4s, v20.8h
3950462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v12.4s, v12.4s
3960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v20.4s, v20.4s
3970462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
3980462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
3990462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
4000462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_ldu2:
4010462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld2         {v20.8b,v21.8b}, [x1], #16
4020462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v20.8h, v20.8b
4030462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v21.8h, v21.8b
4040462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v12.4s, v20.4h
4050462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v13.4s, v21.4h
4060462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v20.4s, v20.8h
4070462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl2       v21.4s, v21.8h
4080462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v12.4s, v12.4s
4090462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v13.4s, v13.4s
4100462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v20.4s, v20.4s
4110462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ucvtf       v21.4s, v21.4s
4120462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
4130462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
4140462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 4
4150462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_ldu2:
4160462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld2         {v12.8b,v13.8b}, [x1], #16
4170462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v12.8h, v12.8b
4180462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v13.8h, v13.8b
4190462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
4200462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
4210462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
4220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_stu4:
4230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v24.4s, v8.4s, #1
4240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v25.4s, v9.4s, #1
4250462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v26.4s, v10.4s, #1
4260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v27.4s, v11.4s, #1
4270462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v28.4s, v16.4s, #1
4280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v29.4s, v17.4s, #1
4290462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v30.4s, v18.4s, #1
4300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v31.4s, v19.4s, #1
4310462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v24.4h, v24.4s, #1
4320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v25.4h, v25.4s, #1
4330462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v26.4h, v26.4s, #1
4340462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v27.4h, v27.4s, #1
4350462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v24.8h, v28.4s, #1
4360462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v25.8h, v29.4s, #1
4370462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v26.8h, v30.4s, #1
4380462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v27.8h, v31.4s, #1
4390462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v24.8b, v24.8h
4400462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v25.8b, v25.8h
4410462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v26.8b, v26.8h
4420462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v27.8b, v27.8h
4430462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
4440462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st4         {v24.8b,v25.8b,v26.8b,v27.8b}, [x0], #32
4450462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
4460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
4470462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
4480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
4490462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_stu4:
4500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v12.8b, v8.8h
4510462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v13.8b, v9.8h
4520462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v14.8b, v10.8h
4530462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v15.8b, v11.8h
4540462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
4550462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st4         {v12.8b,v13.8b,v14.8b,v15.8b}, [x0], #32
4560462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_int_end
4570462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
4580462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
4590462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
4600462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_stu3:
4610462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v24.4s, v8.4s, #1
4620462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v25.4s, v9.4s, #1
4630462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v26.4s, v10.4s, #1
4640462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v28.4s, v16.4s, #1
4650462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v29.4s, v17.4s, #1
4660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v30.4s, v18.4s, #1
4670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v24.4h, v24.4s, #1
4680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v25.4h, v25.4s, #1
4690462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v26.4h, v26.4s, #1
4700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v24.8h, v28.4s, #1
4710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v25.8h, v29.4s, #1
4720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v26.8h, v30.4s, #1
4730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v24.8b, v24.8h
4740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v25.8b, v25.8h
4750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v26.8b, v26.8h
4760462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            movi        v27.8b, #0
4770462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
4780462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st4         {v24.8b,v25.8b,v26.8b,v27.8b}, [x0], #32
4790462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
4800462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
4810462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
4820462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 4
4830462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_ldu1:
4840462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v12.8b}, [x1], #8
4850462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uxtl        v12.8h, v12.8b
4860462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
4870462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
4880462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
4890462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_stu3:
4900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v12.8b, v8.8h
4910462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v13.8b, v9.8h
4920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v14.8b, v10.8h
4930462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            movi        v15.8b, #0
4940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
4950462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st4         {v12.8b,v13.8b,v14.8b,v15.8b}, [x0], #32
4960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_int_end
4970462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
4980462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
4990462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
5000462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_stu2:
5010462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v24.4s, v8.4s, #1
5020462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v25.4s, v9.4s, #1
5030462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v28.4s, v16.4s, #1
5040462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v29.4s, v17.4s, #1
5050462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v24.4h, v24.4s, #1
5060462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v25.4h, v25.4s, #1
5070462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v24.8h, v28.4s, #1
5080462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v25.8h, v29.4s, #1
5090462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v24.8b, v24.8h
5100462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v25.8b, v25.8h
5110462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
5120462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st2         {v24.8b,v25.8b}, [x0], #16
5130462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
5140462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
5150462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5160462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
5170462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_stu2:
5180462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v12.8b, v8.8h
5190462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v13.8b, v9.8h
5200462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
5210462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st2         {v12.8b,v13.8b}, [x0], #16
5220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_int_end
5230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
5240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5250462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
5260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_stu1:
5270462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v12.8b, v8.8h
5280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
5290462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st1         {v12.8b}, [x0], #8
5300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_int_end
5310462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
5320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5330462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_ldf3:
5340462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld4         {v12.4s,v13.4s,v14.4s,v15.4s}, [x1], #64
5350462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld4         {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
5360462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
5370462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5380462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 6
5390462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_stu1:
5400462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v24.4s, v8.4s, #1
5410462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            fcvtzs      v28.4s, v16.4s, #1
5420462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun    v24.4h, v24.4s, #1
5430462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqrshrun2   v24.8h, v28.4s, #1
5440462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            uqxtn       v24.8b, v24.8h
5450462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
5460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st1         {v24.8b}, [x0], #8
5470462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
5480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
5490462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_stf3:
5510462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            movi        v11.16b, #0
5520462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st4         {v8.4s,v9.4s,v10.4s,v11.4s}, [x0], #64
5530462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            movi        v19.16b, #0
5540462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
5550462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st4         {v16.4s,v17.4s,v18.4s,v19.4s}, [x0], #64
5560462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
5570462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
5580462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5590462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
5600462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_stf4:
5610462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st4         {v8.4s,v9.4s,v10.4s,v11.4s}, [x0], #64
5620462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
5630462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st4         {v16.4s,v17.4s,v18.4s,v19.4s}, [x0], #64
5640462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
5650462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
5660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_ldf4:
5680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld4         {v12.4s,v13.4s,v14.4s,v15.4s}, [x1], #64
5690462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld4         {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
5700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
5710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
5730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_stf2:
5740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st2         {v8.4s, v9.4s}, [x0], #32
5750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
5760462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st2         {v16.4s, v17.4s}, [x0], #32
5770462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
5780462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
5790462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5800462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_ldf2:
5810462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld2         {v12.4s,v13.4s}, [x1], #32
5820462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld2         {v20.4s,v21.4s}, [x1], #32
5830462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
5840462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5850462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.align 5
5860462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_stf1:
5870462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st1         {v8.4s}, [x0], #16
5880462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
5890462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st1         {v16.4s}, [x0], #16
5900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
5910462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
5920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5930462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_ldf1:
5940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v12.4s}, [x1], #16
5950462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v20.4s}, [x1], #16
5960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x4
5970462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
5986e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_stu1_end:
5996e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v12.8b, v8.8h
6006e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
6016e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.s}[1], [x0], #4
6026e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
6036e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.h}[1], [x0], #2
6046e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
6056e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.b}[1], [x0], #1
6066e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_int_realend
6076e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
6086e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_stu2_end:
6096e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v12.8b, v8.8h
6106e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v13.8b, v9.8h
6116e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            zip1        v12.16b, v12.16b, v13.16b
6126e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
6136e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.d}[1], [x0], #8
6146e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
6156e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.s}[1], [x0], #4
6166e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
6176e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.h}[1], [x0], #2
6186e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_int_realend
6196e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
6206e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_stu3_end:
6216e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v12.8b, v8.8h
6226e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v13.8b, v9.8h
6236e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v14.8b, v10.8h
6246e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            movi        v15.8b, #0
6256e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
6266e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[4], [x0], #4
6276e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[5], [x0], #4
6286e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[6], [x0], #4
6296e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[7], [x0], #4
6306e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
6316e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[2], [x0], #4
6326e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[3], [x0], #4
6336e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
6346e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[1], [x0], #4
6356e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_int_realend
6366e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
6376e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_stu4_end:
6386e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v12.8b, v8.8h
6396e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v13.8b, v9.8h
6406e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v14.8b, v10.8h
6416e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v15.8b, v11.8h
6426e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
6436e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[4], [x0], #4
6446e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[5], [x0], #4
6456e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[6], [x0], #4
6466e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[7], [x0], #4
6476e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
6486e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[2], [x0], #4
6496e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[3], [x0], #4
6506e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
6516e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[1], [x0], #4
6526e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_int_realend
6536e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
6546e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
6556e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_ldu1_end:
6566e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
6576e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.s}[3], [x1], #4
6586e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
6596e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.h}[5], [x1], #2
6606e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
6616e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.b}[9], [x1], #1
6626e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          uxtl2       v12.8h, v15.16b
6636e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x4
6646e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
6656e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_ldu2_end:
6666e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
6676e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.d}[1], [x1], #8
6686e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
6696e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.s}[1], [x1], #4
6706e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
6716e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.h}[1], [x1], #2
6726e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          uzp1        v14.16b, v15.16b, v15.16b
6736e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uzp2        v15.16b, v15.16b, v15.16b
6746e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v12.8h, v14.8b
6756e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v13.8h, v15.8b
6766e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x4
6776e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
6786e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_ldu3_end:
6796e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
6806e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[4], [x1], #4
6816e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[5], [x1], #4
6826e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[6], [x1], #4
6836e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[7], [x1], #4
6846e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
6856e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[2], [x1], #4
6866e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[3], [x1], #4
6876e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
6886e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[1], [x1], #4
6896e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          uxtl        v12.8h, v12.8b
6906e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v13.8h, v13.8b
6916e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v14.8h, v14.8b
6926e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x4
6936e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
6946e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_ldu4_end:
6956e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
6966e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[4], [x1], #4
6976e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[5], [x1], #4
6986e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[6], [x1], #4
6996e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[7], [x1], #4
7006e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
7016e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[2], [x1], #4
7026e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[3], [x1], #4
7036e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
7046e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.b,v13.b,v14.b,v15.b}[1], [x1], #4
7056e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          uxtl        v12.8h, v12.8b
7066e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v13.8h, v13.8b
7076e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v14.8h, v14.8b
7086e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v15.8h, v15.8b
7096e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x4
7106e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
7116e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_stu1_end:
7126e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v12.4s, v8.4s, #1
7136e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v13.4s, v16.4s, #1
7146e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v12.4h, v12.4s, #1
7156e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun2   v12.8h, v13.4s, #1
7166e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v12.8b, v12.8h
7176e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
7186e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.s}[1], [x0], #4
7196e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
7206e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.h}[1], [x0], #2
7216e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
7226e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.b}[1], [x0], #1
7236e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_float_realend
7246e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
7256e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_stu2_end:
7266e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v12.4s, v8.4s, #1
7276e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v13.4s, v9.4s, #1
7286e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v14.4s, v16.4s, #1
7296e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v15.4s, v17.4s, #1
7306e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v12.4h, v12.4s, #1
7316e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v13.4h, v13.4s, #1
7326e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v14.4h, v14.4s, #1
7336e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v15.4h, v15.4s, #1
7346e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            zip1        v12.8h, v12.8h, v13.8h
7356e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            zip1        v13.8h, v14.8h, v15.8h
7366e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v12.8b, v12.8h
7376e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn2      v12.16b, v13.8h
7386e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
7396e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.d}[1], [x0], #8
7406e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
7416e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.s}[1], [x0], #4
7426e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
7436e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v12.h}[1], [x0], #2
7446e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_float_realend
7456e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
7466e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_stu3_end:
7476e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v24.4s, v8.4s, #1
7486e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v25.4s, v9.4s, #1
7496e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v26.4s, v10.4s, #1
7506e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v28.4s, v16.4s, #1
7516e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v29.4s, v17.4s, #1
7526e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v30.4s, v18.4s, #1
7536e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v24.4h, v24.4s, #1
7546e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v25.4h, v25.4s, #1
7556e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v26.4h, v26.4s, #1
7566e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun2   v24.8h, v28.4s, #1
7576e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun2   v25.8h, v29.4s, #1
7586e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun2   v26.8h, v30.4s, #1
7596e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v12.8b, v24.8h
7606e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v13.8b, v25.8h
7616e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v14.8b, v26.8h
7626e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            movi        v15.8b, #0
7636e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
7646e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[4], [x0], #4
7656e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[5], [x0], #4
7666e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[6], [x0], #4
7676e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[7], [x0], #4
7686e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
7696e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[2], [x0], #4
7706e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[3], [x0], #4
7716e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
7726e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[1], [x0], #4
7736e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_float_realend
7746e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
7756e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_stu4_end:
7766e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v24.4s, v8.4s, #1
7776e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v25.4s, v9.4s, #1
7786e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v26.4s, v10.4s, #1
7796e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v27.4s, v11.4s, #1
7806e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v28.4s, v16.4s, #1
7816e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v29.4s, v17.4s, #1
7826e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v30.4s, v18.4s, #1
7836e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            fcvtzs      v31.4s, v19.4s, #1
7846e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v24.4h, v24.4s, #1
7856e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v25.4h, v25.4s, #1
7866e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v26.4h, v26.4s, #1
7876e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun    v27.4h, v27.4s, #1
7886e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun2   v24.8h, v28.4s, #1
7896e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun2   v25.8h, v29.4s, #1
7906e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun2   v26.8h, v30.4s, #1
7916e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            sqrshrun2   v27.8h, v31.4s, #1
7926e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v12.8b, v24.8h
7936e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v13.8b, v25.8h
7946e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v14.8b, v26.8h
7956e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uqxtn       v15.8b, v27.8h
7966e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
7976e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[4], [x0], #4
7986e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[5], [x0], #4
7996e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[6], [x0], #4
8006e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[7], [x0], #4
8016e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
8026e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[2], [x0], #4
8036e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[3], [x0], #4
8046e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
8056e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v12.b,v13.b,v14.b,v15.b}[1], [x0], #4
8066e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_float_realend
8076e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
8086e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_stf1_end:
8096e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
8106e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v16.4s}, [x0], #16
8116e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
8126e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v8.d}[1], [x0], #8
8136e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
8146e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st1         {v8.s}[1], [x0], #4
8156e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_float_realend
8166e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
8176e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_stf2_end:
8186e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
8196e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st2         {v16.4s, v17.4s}, [x0], #32
8206e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
8216e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st2         {v8.s,v9.s}[2], [x0], #8
8226e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st2         {v8.s,v9.s}[3], [x0], #8
8236e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
8246e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st2         {v8.s,v9.s}[1], [x0], #8
8256e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_float_realend
8266e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
8276e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_stf3_end:
8286e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            movi        v11.16b, #0
8296e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            movi        v19.16b, #0
8306e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_stf4_end:
8316e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
8326e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v16.4s,v17.4s,v18.4s,v19.4s}, [x0], #64
8336e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
8346e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v8.s,v9.s,v10.s,v11.s}[2], [x0], #16
8356e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v8.s,v9.s,v10.s,v11.s}[3], [x0], #16
8366e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
8376e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            st4         {v8.s,v9.s,v10.s,v11.s}[1], [x0], #16
8386e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          b           colormatrix_float_realend
8396e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
8406e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_ldu1_end:
8416e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
8426e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.s}[1], [x1], #4
8436e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
8446e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.h}[1], [x1], #2
8456e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
8466e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.b}[1], [x1], #1
8476e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          uxtl        v15.8h, v15.8b
8486e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v12.4s, v15.4h
8496e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v20.4s, v15.8h
8506e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v12.4s, v12.4s
8516e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v20.4s, v20.4s
8526e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x4
8536e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
8546e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_ldu2_end:
8556e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
8566e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.d}[1], [x1], #8
8576e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
8586e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.s}[1], [x1], #4
8596e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
8606e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v15.h}[1], [x1], #2
8616e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          uxtl        v14.8h, v15.8b
8626e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v15.8h, v15.16b
8636e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uzp1        v12.8h, v14.8h, v14.8h
8646e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uzp2        v13.8h, v14.8h, v14.8h
8656e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uzp1        v20.8h, v15.8h, v15.8h
8666e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uzp2        v21.8h, v15.8h, v15.8h
8676e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v12.4s, v12.4h
8686e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v13.4s, v13.4h
8696e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v20.4s, v20.4h
8706e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v21.4s, v21.4h
8716e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v12.4s, v12.4s
8726e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v13.4s, v13.4s
8736e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v20.4s, v20.4s
8746e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v21.4s, v21.4s
8756e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x4
8766e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
8776e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_ldu3_end:
8786e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
8796e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[4], [x1], #4
8806e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[5], [x1], #4
8816e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[6], [x1], #4
8826e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[7], [x1], #4
8836e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
8846e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[2], [x1], #4
8856e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[3], [x1], #4
8866e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
8876e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[1], [x1], #4
8886e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          uxtl        v20.8h, v20.8b
8896e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v21.8h, v21.8b
8906e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v22.8h, v22.8b
8916e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v12.4s, v20.4h
8926e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v13.4s, v21.4h
8936e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v14.4s, v22.4h
8946e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v20.4s, v20.8h
8956e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v21.4s, v21.8h
8966e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v22.4s, v22.8h
8976e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v12.4s, v12.4s
8986e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v13.4s, v13.4s
8996e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v14.4s, v14.4s
9006e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v20.4s, v20.4s
9016e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v21.4s, v21.4s
9026e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v22.4s, v22.4s
9036e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x4
9046e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
9056e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_ldu4_end:
9066e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
9076e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[4], [x1], #4
9086e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[5], [x1], #4
9096e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[6], [x1], #4
9106e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[7], [x1], #4
9116e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
9126e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[2], [x1], #4
9136e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[3], [x1], #4
9146e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
9156e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.b,v21.b,v22.b,v23.b}[1], [x1], #4
9166e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          uxtl        v20.8h, v20.8b
9176e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v21.8h, v21.8b
9186e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v22.8h, v22.8b
9196e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v23.8h, v23.8b
9206e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v12.4s, v20.4h
9216e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v13.4s, v21.4h
9226e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v14.4s, v22.4h
9236e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl        v15.4s, v23.4h
9246e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v20.4s, v20.8h
9256e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v21.4s, v21.8h
9266e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v22.4s, v22.8h
9276e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            uxtl2       v23.4s, v23.8h
9286e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v12.4s, v12.4s
9296e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v13.4s, v13.4s
9306e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v14.4s, v14.4s
9316e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v15.4s, v15.4s
9326e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v20.4s, v20.4s
9336e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v21.4s, v21.4s
9346e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v22.4s, v22.4s
9356e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ucvtf       v23.4s, v23.4s
9366e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x4
9370462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
9386e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_ldf1_end:
9396e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
9406e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v20.4s}, [x1], #16
9416e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
9426e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v12.d}[1], [x1], #8
9436e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
9446e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld1         {v12.s}[1], [x1], #4
9456e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          br          x4
9466e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
9476e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_ldf2_end:
9486e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
9496e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld2         {v20.4s,v21.4s}, [x1], #32
9506e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
9516e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld2         {v12.s,v13.s}[2], [x1], #8
9526e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld2         {v12.s,v13.s}[3], [x1], #8
9536e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
9546e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld2         {v12.s,v13.s}[1], [x1], #8
9556e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          br          x4
9566e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
9576e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_ldf3_end:
9586e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_ldf4_end:
9596e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            tbz         x2, #2, 1f
9606e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
9616e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #1, 1f
9626e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.s,v13.s,v14.s,v15.s}[2], [x1], #16
9636e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.s,v13.s,v14.s,v15.s}[3], [x1], #16
9646e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          tbz         x2, #0, 1f
9656e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ld4         {v12.s,v13.s,v14.s,v15.s}[1], [x1], #16
9666e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie1:          br          x4
9676e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
9686e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie/* void rsdIntrinsicColorMatrix_int_K(
9690462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          void *out,              // x0
9700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          void const *in,         // x1
9710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          size_t count,           // x2
9720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          fntab_t const *fns,     // x3
9730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          int16_t const *mult,    // x4
9740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          int32_t const *add);    // x5
9750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie */
9760462a39371659d1eeed5eb48dd6d507760301c22Simon HosieENTRY(rsdIntrinsicColorMatrix_int_K)
9770462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sub         x7, sp, #32
9780462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sub         sp, sp, #64
9790462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st1         {v8.1d-v11.1d}, [sp]
9800462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st1         {v12.1d-v15.1d}, [x7]
9810462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
9820462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v0.8h,v1.8h}, [x4], #32
9830462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v4.4s}, [x5], #16
9840462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
9850462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ldp         x4,x5, [x3],#16
9860462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ldp         x6,x7, [x3],#16
9870462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ldp         x8,x9, [x3],#16
9880462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
9890462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v12.4s, v4.s[0]
9900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v13.4s, v4.s[1]
9910462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v14.4s, v4.s[2]
9920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            dup         v15.4s, v4.s[3]
9930462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v8.4h, v12.4s, #8
9940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v8.8h, v12.4s, #8
9950462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v9.4h, v13.4s, #8
9960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v9.8h, v13.4s, #8
9970462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v10.4h, v14.4s, #8
9980462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v10.8h, v14.4s, #8
9990462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun     v11.4h, v15.4s, #8
10000462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sqshrun2    v11.8h, v15.4s, #8
10010462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
10020462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
10030462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_int_end
10040462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
10050462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
10060462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_int_end:
10076e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            adds        x2, x2, #8
10086e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            bls         colormatrix_int_realend
10096e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            mov         x16, x8
10106e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldp         x8, x9, [x3], #16
10116e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            cmp         x4, x16
10126e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            csel        x4, x8, x4, eq
10136e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            cmp         x5, x16
10146e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            csel        x5, x8, x5, eq
10156e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            cmp         x6, x16
10166e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            csel        x6, x8, x6, eq
10176e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            cmp         x7, x16
10186e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            csel        x7, x8, x7, eq
10196e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x9
10206e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
10216e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_int_realend:
10220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v8.1d-v11.1d}, [sp], #32
10230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v12.1d-v15.1d}, [sp], #32
10240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ret
10250462a39371659d1eeed5eb48dd6d507760301c22Simon HosieEND(rsdIntrinsicColorMatrix_int_K)
10260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
10270462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie/* void rsdIntrinsicColorMatrixSetup_int_K(
10280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          fntab_t const *fns, // x0
10290462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          uint32_t mask,      // x1
10300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          int dt,             // x2
10310462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          int st);            // x3
10320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie */
10330462a39371659d1eeed5eb48dd6d507760301c22Simon HosieENTRY(rsdIntrinsicColorMatrixSetup_int_K)
10346e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            adr         x7, 2f
10356e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x4, x7, x2, LSL #2
10366e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x2, [x4], #2
10376e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x4, [x4]
10386e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x2, x2, x7
10396e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x4, x4, x7
10406e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            adr         x7, 3f
10416e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x5, x7, x3, LSL #2
10426e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x3, [x5], #2
10436e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x5, [x5]
10446e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x3, x3, x7
10456e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x5, x5, x7
10460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            stp         x2, x3, [x0, #32]
10476e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            stp         x4, x5, [x0, #48]
10480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
10490462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie/* For each column function, if the matrix is all zeroes then write NULL,
10500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * otherwise look up the appropriate function and store that. */
10510462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
10520462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         x3, #4
10536e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            adr         x7, 4f
10540462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie1:          ands        x2, x1, #15
10550462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            beq         9f
10560462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            and         x2, x1, #31
10570462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            lsl         x2, x2, #3
10586e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x2, [x7, x2]
10596e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x2, x2, x7
10600462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie9:          str         x2, [x0], #8
10610462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            lsr         x1, x1, #5
10626e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x7, x7, #2
10630462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x3, x3, #1
10640462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            bne         1b
10650462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
10660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie/* For every NULL entry, copy the non-NULL entry that follows it, or the store
10670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * function. */
10680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
10690462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ldr         x2, [x0]
10700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         x3, #4
10710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie1:          ldr         x1, [x0, #-8]!
10720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            cmp         x1, #0
10730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            csel        x2, x1, x2, ne
10740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            str         x2, [x0]
10750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x3, x3, #1
10760462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            bne         1b
10770462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ret
10780462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
10790462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .align 4
10800462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie2:          .hword      colormatrix_int_stu1-2b
10816e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_int_stu1_end-2b
10820462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_stu2-2b
10836e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_int_stu2_end-2b
10840462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_stu3-2b
10856e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_int_stu3_end-2b
10860462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_stu4-2b
10876e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_int_stu4_end-2b
10880462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie3:          .hword      colormatrix_int_ldu1-3b
10896e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_int_ldu1_end-3b
10900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_ldu2-3b
10916e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_int_ldu2_end-3b
10920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_ldu3-3b
10936e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_int_ldu3_end-3b
10940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_ldu4-3b
10956e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_int_ldu4_end-3b
10960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie4:
10970462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.irp i, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10980462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_col0_\i-4b
10990462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_col1_\i-4b-2
11000462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_col2_\i-4b-4
11010462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_col3_\i-4b-6
11020462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endr
11030462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.irp i, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
11040462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_col0_n\i-4b
11050462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_col1_n\i-4b-2
11060462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_col2_n\i-4b-4
11070462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_int_col3_n\i-4b-6
11080462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endr
11090462a39371659d1eeed5eb48dd6d507760301c22Simon HosieEND(rsdIntrinsicColorMatrixSetup_int_K)
11100462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11110462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11126e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie/* void rsdIntrinsicColorMatrix_float_K(
11130462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          void *out,              // x0
11140462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          void const *in,         // x1
11150462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          size_t count,           // x2
11160462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          fntab_t const *fns,     // x3
11170462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          float const *mult,      // x4
11180462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          float const *add);      // x5
11190462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie */
11200462a39371659d1eeed5eb48dd6d507760301c22Simon HosieENTRY(rsdIntrinsicColorMatrix_float_K)
11210462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sub         x7, sp, #32
11220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            sub         sp, sp, #64
11230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st1         {v8.1d-v11.1d}, [sp]
11240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            st1         {v12.1d-v15.1d}, [x7]
11250462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v0.4s,v1.4s,v2.4s,v3.4s}, [x4], #64
11270462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1r        {v4.4s}, [x5], #4
11280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1r        {v5.4s}, [x5], #4
11290462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1r        {v6.4s}, [x5], #4
11300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1r        {v7.4s}, [x5], #4
11310462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ldp         x4,x5, [x3], #16
11330462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ldp         x6,x7, [x3], #16
11340462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ldp         x8,x9, [x3], #16
11350462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11360462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         v8.16b, v4.16b
11370462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         v9.16b, v5.16b
11380462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         v10.16b, v6.16b
11390462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         v11.16b, v7.16b
11400462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11410462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         v16.16b, v4.16b
11420462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         v17.16b, v5.16b
11430462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         v18.16b, v6.16b
11440462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         v19.16b, v7.16b
11450462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x2, x2, #8
11470462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            blo         colormatrix_float_end
11480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            br          x9
11490462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosiecolormatrix_float_end:
11516e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            adds        x2, x2, #8
11526e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            bls         colormatrix_int_realend
11536e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            mov         x16, x8
11546e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldp         x8,x9, [x3], #16
11556e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            cmp         x4, x16
11566e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            csel        x4, x8, x4, eq
11576e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            cmp         x5, x16
11586e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            csel        x5, x8, x5, eq
11596e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            cmp         x6, x16
11606e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            csel        x6, x8, x6, eq
11616e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            cmp         x7, x16
11626e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            csel        x7, x8, x7, eq
11636e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            br          x9
11646e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie
11656e7e258316f72be95039278e88e3bc1daea1668fSimon Hosiecolormatrix_float_realend:
11660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v8.1d-v11.1d}, [sp], #32
11670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ld1         {v12.1d-v15.1d}, [sp], #32
11680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ret
11690462a39371659d1eeed5eb48dd6d507760301c22Simon HosieEND(rsdIntrinsicColorMatrix_float_K)
11700462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11710462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie/* void rsdIntrinsicColorMatrixSetup_float_K(
11720462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          fntab_t const *fns, // x0
11730462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          uint32_t mask,      // x1
11740462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          int dt,             // x2
11750462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie *          int st);            // x3
11760462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie */
11770462a39371659d1eeed5eb48dd6d507760301c22Simon HosieENTRY(rsdIntrinsicColorMatrixSetup_float_K)
11786e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            adr         x7, 2f
11796e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x4, x7, x2, LSL #2
11806e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x2, [x4], #2
11816e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x4, [x4]
11826e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x2, x2, x7
11836e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x4, x4, x7
11846e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            adr         x7, 3f
11856e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x5, x7, x3, LSL #2
11866e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x3, [x5], #2
11876e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x5, [x5]
11886e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x3, x3, x7
11896e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x5, x5, x7
11900462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            stp         x2, x3, [x0, #32]
11916e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            stp         x4, x5, [x0, #48]
11920462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11930462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie/* For each column function, if the matrix is all zeroes then write NULL,
11940462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * otherwise look up the appropriate function and store that. */
11950462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
11960462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         x3, #4
11976e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            adr         x7, 4f
11980462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie1:          ands        x2, x1, #15
11990462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            beq         9f
12000462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            and         x2, x1, #31
12010462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            lsl         x2, x2, #3
12026e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            ldrsh       x2, [x7, x2]
12036e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x2, x2, x7
12040462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie9:          str         x2, [x0], #8
12050462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            lsr         x1, x1, #5
12066e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            add         x7, x7, #2
12070462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x3, x3, #1
12080462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            bne         1b
12090462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
12100462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie/* For every NULL entry, copy the non-NULL entry that follows it, or the store
12110462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie * function. */
12120462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
12130462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ldr         x2, [x0]
12140462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            mov         x3, #4
12150462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie1:          ldr         x1, [x0, #-8]!
12160462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            cmp         x1, #0
12170462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            csel        x2, x1, x2, ne
12180462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            str         x2, [x0]
12190462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            subs        x3, x3, #1
12200462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            bne         1b
12210462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            ret
12220462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie
12230462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .align 4
12240462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie2:          .hword      colormatrix_float_stu1-2b
12256e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_stu1_end-2b
12260462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_stu2-2b
12276e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_stu2_end-2b
12280462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_stu3-2b
12296e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_stu3_end-2b
12300462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_stu4-2b
12316e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_stu4_end-2b
12320462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_stf1-2b
12336e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_stf1_end-2b
12340462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_stf2-2b
12356e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_stf2_end-2b
12360462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_stf3-2b
12376e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_stf3_end-2b
12380462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_stf4-2b
12396e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_stf4_end-2b
12400462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie3:          .hword      colormatrix_float_ldu1-3b
12416e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_ldu1_end-3b
12420462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_ldu2-3b
12436e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_ldu2_end-3b
12440462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_ldu3-3b
12456e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_ldu3_end-3b
12460462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_ldu4-3b
12476e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_ldu4_end-3b
12480462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_ldf1-3b
12496e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_ldf1_end-3b
12500462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_ldf2-3b
12516e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_ldf2_end-3b
12520462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_ldf3-3b
12536e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_ldf3_end-3b
12540462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_ldf4-3b
12556e7e258316f72be95039278e88e3bc1daea1668fSimon Hosie            .hword      colormatrix_float_ldf4_end-3b
12560462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie4:
12570462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.irp i, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
12580462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_col0_\i-4b
12590462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_col1_\i-4b-2
12600462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_col2_\i-4b-4
12610462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_col3_\i-4b-6
12620462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endr
12630462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.irp i, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
12640462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_col0_n\i-4b
12650462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_col1_n\i-4b-2
12660462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_col2_n\i-4b-4
12670462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie            .hword      colormatrix_float_col3_n\i-4b-6
12680462a39371659d1eeed5eb48dd6d507760301c22Simon Hosie.endr
12690462a39371659d1eeed5eb48dd6d507760301c22Simon HosieEND(rsdIntrinsicColorMatrixSetup_float_K)
1270