15d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/*
25d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Copyright (C) 2013-2014 The Android Open Source Project
35d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
45d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Licensed under the Apache License, Version 2.0 (the "License");
55d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * you may not use this file except in compliance with the License.
65d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * You may obtain a copy of the License at
75d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
85d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *      http://www.apache.org/licenses/LICENSE-2.0
95d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Unless required by applicable law or agreed to in writing, software
115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * distributed under the License is distributed on an "AS IS" BASIS,
125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * See the License for the specific language governing permissions and
145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * limitations under the License.
155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f:
185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define END(f) .size f, .-f;
195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_LIST(X) \
215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(0, CLEAR) \
225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(1, SRC) \
235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(2, DST) \
245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(3, SRC_OVER) \
255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(4, DST_OVER) \
265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(5, SRC_IN) \
275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(6, DST_IN) \
285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(7, SRC_OUT) \
295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(8, DST_OUT) \
305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(9, SRC_ATOP) \
315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(10, DST_ATOP) \
325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(11, XOR) \
335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(14, MULTIPLY) \
345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(21, DIFFERENCE) \
355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(34, ADD) \
365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(35, SUBTRACT)
375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* For every blend operation supported, define a macro with just the arithmetic
395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * component.  The rest can be handled later on.
405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11
425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * contain the data from the source buffer.  Both have already been split out
435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * into one colour component per register (if necessary).  q3 and q11 contain
445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * the alpha components.
455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * At the same time as defining the assembly macro, define a corresponding
475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * preprocessor macro indicating any other requirements.
485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *    zipped=0 -- The macro does not require the RGBA components to be
495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *                separated.
505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *    lddst=0  -- The macro does not require data from the destination buffer.
515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *    ldsrc=0  -- The macro does not require data from the source buffer.
525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *    nowrap=1 -- The macro requires no wrapper at all, and should simply be
535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *                inserted without any surrounding load/store or loop code.
545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_CLEAR zipped=0, lddst=0, ldsrc=0
575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_CLEAR
585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v0.16b, #0
595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v1.16b, #0
605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v2.16b, #0
615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v3.16b, #0
625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC zipped=0, lddst=0
655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC
665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mov     v0.16b, v8.16b
675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mov     v1.16b, v9.16b
685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mov     v2.16b, v10.16b
695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mov     v3.16b, v11.16b
705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST nowrap=1
735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST
745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        /* nop */
755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_OVER zipped=1
785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_OVER
795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mvn         v7.16b, v11.16b
805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v12.8h, v7.16b, v0.16b
825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v0.8h,  v7.8b,  v0.8b
835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v13.8h, v7.16b, v1.16b
845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v1.8h,  v7.8b,  v1.8b
855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v14.8h, v7.16b, v2.16b
865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v2.8h,  v7.8b,  v2.8b
875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v15.8h, v7.16b, v3.16b
885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v3.8h,  v7.8b,  v3.8b
895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v4.8b,  v0.8h,  #8
915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v4.16b, v12.8h, #8
925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v5.8b,  v1.8h,  #8
935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v5.16b, v13.8h, #8
945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v6.8b,  v2.8h,  #8
955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v6.16b, v14.8h, #8
965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v7.8b,  v3.8h,  #8
975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v7.16b, v15.8h, #8
985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v0.8h,  v0.8h,  v4.8b
1005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v12.8h, v12.8h, v4.16b
1015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v1.8h,  v1.8h,  v5.8b
1025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v13.8h, v13.8h, v5.16b
1035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v2.8h,  v2.8h,  v6.8b
1045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v14.8h, v14.8h, v6.16b
1055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v3.8h,  v3.8h,  v7.8b
1065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v15.8h, v15.8h, v7.16b
1075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v0.8b,  v0.8h,  #8
1095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v0.16b, v12.8h, #8
1105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v1.8b,  v1.8h,  #8
1115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v1.16b, v13.8h, #8
1125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v2.8b,  v2.8h,  #8
1135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v2.16b, v14.8h, #8
1145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v3.8b,  v3.8h,  #8
1155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v3.16b, v15.8h, #8
1165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v0.16b, v0.16b, v8.16b
1185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v1.16b, v1.16b, v9.16b
1195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v2.16b, v2.16b, v10.16b
1205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v3.16b, v3.16b, v11.16b
1215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
1225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_OVER zipped=1
1245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_OVER
1255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mvn         v7.16b, v3.16b
1265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v12.8h, v7.16b, v8.16b
1285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v8.8h,  v7.8b,  v8.8b
1295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v13.8h, v7.16b, v9.16b
1305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v9.8h,  v7.8b,  v9.8b
1315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v14.8h, v7.16b, v10.16b
1325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v10.8h, v7.8b,  v10.8b
1335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v15.8h, v7.16b, v11.16b
1345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v11.8h, v7.8b,  v11.8b
1355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v4.8b,  v8.8h,  #8
1375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v4.16b, v12.8h, #8
1385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v5.8b,  v9.8h,  #8
1395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v5.16b, v13.8h, #8
1405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v6.8b,  v10.8h, #8
1415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v6.16b, v14.8h, #8
1425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v7.8b,  v11.8h, #8
1435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v7.16b, v15.8h, #8
1445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v8.8h,  v8.8h,  v4.8b
1465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v12.8h, v12.8h, v4.16b
1475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v9.8h,  v9.8h,  v5.8b
1485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v13.8h, v13.8h, v5.16b
1495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v10.8h, v10.8h, v6.8b
1505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v14.8h, v14.8h, v6.16b
1515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v11.8h, v11.8h, v7.8b
1525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v15.8h, v15.8h, v7.16b
1535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v8.8b,  v8.8h,  #8
1555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v8.16b, v12.8h, #8
1565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v9.8b,  v9.8h,  #8
1575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v9.16b, v13.8h, #8
1585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v10.8b,  v10.8h, #8
1595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v10.16b, v14.8h, #8
1605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v11.8b,  v11.8h, #8
1615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v11.16b, v15.8h, #8
1625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v0.16b, v0.16b, v8.16b
1645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v1.16b, v1.16b, v9.16b
1655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v2.16b, v2.16b, v10.16b
1665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v3.16b, v3.16b, v11.16b
1675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
1685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_IN zipped=1
1705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_IN
1715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v12.8h, v3.16b, v8.16b
1725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v0.8h,  v3.8b,  v8.8b
1735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v13.8h, v3.16b, v9.16b
1745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v1.8h,  v3.8b,  v9.8b
1755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v14.8h, v3.16b, v10.16b
1765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v2.8h,  v3.8b,  v10.8b
1775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v15.8h, v3.16b, v11.16b
1785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v3.8h,  v3.8b,  v11.8b
1795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v4.8b,  v0.8h,  #8
1815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v4.16b, v12.8h, #8
1825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v5.8b,  v1.8h,  #8
1835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v5.16b, v13.8h, #8
1845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v6.8b,  v2.8h,  #8
1855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v6.16b, v14.8h, #8
1865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v7.8b,  v3.8h,  #8
1875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v7.16b, v15.8h, #8
1885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v0.8h,  v0.8h,  v4.8b
1905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v12.8h, v12.8h, v4.16b
1915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v1.8h,  v1.8h,  v5.8b
1925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v13.8h, v13.8h, v5.16b
1935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v2.8h,  v2.8h,  v6.8b
1945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v14.8h, v14.8h, v6.16b
1955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v3.8h,  v3.8h,  v7.8b
1965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v15.8h, v15.8h, v7.16b
1975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v0.8b,  v0.8h,  #8
1995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v0.16b, v12.8h, #8
2005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v1.8b,  v1.8h,  #8
2015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v1.16b, v13.8h, #8
2025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v2.8b,  v2.8h,  #8
2035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v2.16b, v14.8h, #8
2045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v3.8b,  v3.8h,  #8
2055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v3.16b, v15.8h, #8
2065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
2075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_IN zipped=1
2095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_IN
2105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v12.8h, v0.16b, v11.16b
2115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v0.8h,  v0.8b,  v11.8b
2125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v13.8h, v1.16b, v11.16b
2135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v1.8h,  v1.8b,  v11.8b
2145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v14.8h, v2.16b, v11.16b
2155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v2.8h,  v2.8b,  v11.8b
2165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v15.8h, v3.16b, v11.16b
2175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v3.8h,  v3.8b,  v11.8b
2185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v4.8b,  v0.8h,  #8
2205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v4.16b, v12.8h, #8
2215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v5.8b,  v1.8h,  #8
2225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v5.16b, v13.8h, #8
2235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v6.8b,  v2.8h,  #8
2245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v6.16b, v14.8h, #8
2255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v7.8b,  v3.8h,  #8
2265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v7.16b, v15.8h, #8
2275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v0.8h,  v0.8h,  v4.8b
2295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v12.8h, v12.8h, v4.16b
2305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v1.8h,  v1.8h,  v5.8b
2315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v13.8h, v13.8h, v5.16b
2325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v2.8h,  v2.8h,  v6.8b
2335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v14.8h, v14.8h, v6.16b
2345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v3.8h,  v3.8h,  v7.8b
2355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v15.8h, v15.8h, v7.16b
2365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v0.8b,  v0.8h,  #8
2385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v0.16b, v12.8h, #8
2395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v1.8b,  v1.8h,  #8
2405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v1.16b, v13.8h, #8
2415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v2.8b,  v2.8h,  #8
2425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v2.16b, v14.8h, #8
2435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v3.8b,  v3.8h,  #8
2445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v3.16b, v15.8h, #8
2455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
2465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_OUT zipped=1
2485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_OUT
2495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mvn         v3.16b, v3.16b
2505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        blend_kernel_SRC_IN
2515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
2525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_OUT zipped=1
2555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_OUT
2565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mvn         v11.16b, v11.16b
2575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        blend_kernel_DST_IN
2585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
2595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_ATOP zipped=1
2615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_ATOP
2625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mvn         v11.16b, v11.16b
2635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v12.8h, v11.16b, v0.16b
2655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v0.8h,  v11.8b,  v0.8b
2665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v13.8h, v11.16b, v1.16b
2675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v1.8h,  v11.8b,  v1.8b
2685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v14.8h, v11.16b, v2.16b
2695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v2.8h,  v11.8b,  v2.8b
2705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v4.8h,  v3.16b, v8.16b
2725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v8.8h,  v3.8b,  v8.8b
2735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v5.8h,  v3.16b, v9.16b
2745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v9.8h,  v3.8b,  v9.8b
2755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v6.8h,  v3.16b, v10.16b
2765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v10.8h, v3.8b,  v10.8b
2775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v12.8h, v12.8h, v4.8h
2795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v0.8h,  v0.8h,  v8.8h
2805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v13.8h, v13.8h, v5.8h
2815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v1.8h,  v1.8h,  v9.8h
2825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v14.8h, v14.8h, v6.8h
2835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v2.8h,  v2.8h,  v10.8h
2845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v8.8h,  v0.8h,  #8
2865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v4.8h,  v12.8h, #8
2875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v9.8h,  v1.8h,  #8
2885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v5.8h,  v13.8h, #8
2895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v10.8h, v2.8h,  #8
2905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v6.8h,  v14.8h, #8
2915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v0.8h,  v0.8h,  v8.8h
2935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v12.8h, v12.8h, v4.8h
2945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v1.8h,  v1.8h,  v9.8h
2955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v13.8h, v13.8h, v5.8h
2965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v2.8h,  v2.8h,  v10.8h
2975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v14.8h, v14.8h, v6.8h
2985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn     v0.8b,  v0.8h,  #8
3005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn2    v0.16b, v12.8h, #8
3015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn     v1.8b,  v1.8h,  #8
3025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn2    v1.16b, v13.8h, #8
3035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn     v2.8b,  v2.8h,  #8
3045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn2    v2.16b, v14.8h, #8
3055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
3065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_ATOP zipped=1
3085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_ATOP
3095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mvn         v3.16b, v3.16b
3105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v12.8h, v11.16b, v0.16b
3125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v0.8h,  v11.8b,  v0.8b
3135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v13.8h, v11.16b, v1.16b
3145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v1.8h,  v11.8b,  v1.8b
3155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v14.8h, v11.16b, v2.16b
3165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v2.8h,  v11.8b,  v2.8b
3175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v4.8h,  v3.16b, v8.16b
3195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v8.8h,  v3.8b,  v8.8b
3205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v5.8h,  v3.16b, v9.16b
3215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v9.8h,  v3.8b,  v9.8b
3225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v6.8h,  v3.16b, v10.16b
3235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v10.8h, v3.8b,  v10.8b
3245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v12.8h, v12.8h, v4.8h
3265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v0.8h,  v0.8h,  v8.8h
3275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v13.8h, v13.8h, v5.8h
3285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v1.8h,  v1.8h,  v9.8h
3295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v14.8h, v14.8h, v6.8h
3305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v2.8h,  v2.8h,  v10.8h
3315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v8.8h,  v0.8h,  #8
3335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v4.8h,  v12.8h, #8
3345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v9.8h,  v1.8h,  #8
3355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v5.8h,  v13.8h, #8
3365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v10.8h, v2.8h,  #8
3375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        urshr       v6.8h,  v14.8h, #8
3385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v0.8h,  v0.8h,  v8.8h
3405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v12.8h, v12.8h, v4.8h
3415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v1.8h,  v1.8h,  v9.8h
3425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v13.8h, v13.8h, v5.8h
3435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v2.8h,  v2.8h,  v10.8h
3445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd       v14.8h, v14.8h, v6.8h
3455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn     v0.8b,  v0.8h,  #8
3475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn2    v0.16b, v12.8h, #8
3485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn     v1.8b,  v1.8h,  #8
3495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn2    v1.16b, v13.8h, #8
3505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn     v2.8b,  v2.8h,  #8
3515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqrshrn2    v2.16b, v14.8h, #8
3525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mvn         v3.16b, v3.16b
3545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
3555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_MULTIPLY zipped=0
3575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_MULTIPLY
3585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v12.8h, v0.16b, v8.16b
3595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v0.8h,  v0.8b,  v8.8b
3605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v13.8h, v1.16b, v9.16b
3615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v1.8h,  v1.8b,  v9.8b
3625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v14.8h, v2.16b, v10.16b
3635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v2.8h,  v2.8b,  v10.8b
3645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull2      v15.8h, v3.16b, v11.16b
3655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        umull       v3.8h,  v3.8b,  v11.8b
3665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v4.8b,  v0.8h,  #8
3685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v4.16b, v12.8h, #8
3695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v5.8b,  v1.8h,  #8
3705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v5.16b, v13.8h, #8
3715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v6.8b,  v2.8h,  #8
3725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v6.16b, v14.8h, #8
3735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v7.8b,  v3.8h,  #8
3745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v7.16b, v15.8h, #8
3755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v0.8h,  v0.8h,  v4.8b
3775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v12.8h, v12.8h, v4.16b
3785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v1.8h,  v1.8h,  v5.8b
3795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v13.8h, v13.8h, v5.16b
3805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v2.8h,  v2.8h,  v6.8b
3815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v14.8h, v14.8h, v6.16b
3825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw       v3.8h,  v3.8h,  v7.8b
3835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uaddw2      v15.8h, v15.8h, v7.16b
3845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v0.8b,  v0.8h,  #8
3865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v0.16b, v12.8h, #8
3875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v1.8b,  v1.8h,  #8
3885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v1.16b, v13.8h, #8
3895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v2.8b,  v2.8h,  #8
3905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v2.16b, v14.8h, #8
3915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn       v3.8b,  v3.8h,  #8
3925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        rshrn2      v3.16b, v15.8h, #8
3935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
3945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_ADD zipped=0
3965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_ADD
3975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd    v0.16b, v0.16b, v8.16b
3985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd    v1.16b, v1.16b, v9.16b
3995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd    v2.16b, v2.16b, v10.16b
4005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqadd    v3.16b, v3.16b, v11.16b
4015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
4025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SUBTRACT zipped=0
4045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SUBTRACT
4055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqsub    v0.16b, v0.16b, v8.16b
4065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqsub    v1.16b, v1.16b, v9.16b
4075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqsub    v2.16b, v2.16b, v10.16b
4085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uqsub    v3.16b, v3.16b, v11.16b
4095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
4105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DIFFERENCE zipped=0
4125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DIFFERENCE
4135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uabd    v0.16b, v0.16b, v8.16b
4145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uabd    v1.16b, v1.16b, v9.16b
4155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uabd    v2.16b, v2.16b, v10.16b
4165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uabd    v3.16b, v3.16b, v11.16b
4175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
4185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_XOR zipped=0
4205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_XOR
4215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        eor     v0.16b, v0.16b, v8.16b
4225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        eor     v1.16b, v1.16b, v9.16b
4235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        eor     v2.16b, v2.16b, v10.16b
4245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        eor     v3.16b, v3.16b, v11.16b
4255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
4265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* Define the wrapper code which will load and store the data, iterate the
4295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * correct number of times, and safely handle the remainder at the end of the
4305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * loop.  Various sections of assembly code are dropped or substituted for
4315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * simpler operations if they're not needed.
4325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
4335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1
4345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.if \nowrap
4355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        \kernel
4365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.else
4375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        sub     x3, sp, #32
4385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        sub     sp, sp, #64
4395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v8.1d - v11.1d}, [sp]
4405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v12.1d - v15.1d}, [x3]
4415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        subs    x2, x2, #64
4425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        b       2f
4435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.align 4
4445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:
4455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst
4465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .if \zipped
4475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        ld4     {v0.16b - v3.16b}, [x0]
4485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .else
4495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        ld1     {v0.16b - v3.16b}, [x0]
4505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .endif
4515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
4525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc
4535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .if \zipped
4545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        ld4     {v8.16b - v11.16b}, [x1], #64
4555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .else
4565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        ld1     {v8.16b - v11.16b}, [x1], #64
4575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .endif
4585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
4595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \pld
4605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#if 0 /* TODO: test this on real hardware */
4615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .if \lddst ; prfm PLDL1STRM, [x0, #192] ; .endif
4625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .if \ldsrc ; prfm PLDL1STRM, [x1, #192] ; .endif
4635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#endif
4645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
4655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        \kernel
4675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        subs    x2, x2, #64
4695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \zipped
4705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st4     {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64
4715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .else
4725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64
4735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
4745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie2:      bge     1b
4765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        adds    x2, x2, #64
4775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     2f
4785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        /* To handle the tail portion of the data (something less than 64
4805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * bytes) load small power-of-two chunks into working registers.  It
4815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * doesn't matter where they end up in the register; the same process
4825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * will store them back out using the same positions and the operations
4835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * don't require data to interact with its neighbours.
4845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         */
4855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v0.16b, #0
4865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v1.16b, #0
4875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v2.16b, #0
4885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v3.16b, #0
4895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v8.16b, #0
4915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v9.16b, #0
4925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v10.16b, #0
4935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        movi    v11.16b, #0
4945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        tbz     x2, #5, 1f
4965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; ld1     {v2.16b,v3.16b}, [x0], #32   ; .endif
4975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; ld1     {v10.16b,v11.16b}, [x1], #32 ; .endif
4985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #4, 1f
4995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; ld1     {v1.16b}, [x0], #16  ; .endif
5005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; ld1     {v9.16b}, [x1], #16  ; .endif
5015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #3, 1f
5025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; ld1     {v0.d}[1], [x0], #8 ; .endif
5035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; ld1     {v8.d}[1], [x1], #8 ; .endif
5045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #2, 1f
5055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; ld1     {v0.s}[1], [x0], #4 ; .endif
5065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; ld1     {v8.s}[1], [x1], #4 ; .endif
5075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #1, 1f
5085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; ld1     {v0.h}[1], [x0], #2 ; .endif
5095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; ld1     {v8.h}[1], [x1], #2 ; .endif
5105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #0, 1f
5115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; ld1     {v0.b}[1], [x0], #1 ; .endif
5125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; ld1     {v8.b}[1], [x1], #1 ; .endif
5135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:
5145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; sub     x0, x0, x2           ; .endif
5155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.if \zipped
5175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        /* One small impediment in the process above is that some of the load
5185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * operations can't perform byte-wise structure deinterleaving at the
5195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * same time as loading only part of a register.  So the data is loaded
5205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * linearly and unpacked manually at this point.
5215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         */
5225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp1    v4.16b, v0.16b, v1.16b
5235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp2    v5.16b, v0.16b, v1.16b
5245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp1    v6.16b, v2.16b, v3.16b
5255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp2    v7.16b, v2.16b, v3.16b
5265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp1    v0.16b, v4.16b, v6.16b
5275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp2    v2.16b, v4.16b, v6.16b
5285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp1    v1.16b, v5.16b, v7.16b
5295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp2    v3.16b, v5.16b, v7.16b
5305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp1    v4.16b, v8.16b, v9.16b
5325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp2    v5.16b, v8.16b, v9.16b
5335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp1    v6.16b, v10.16b, v11.16b
5345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp2    v7.16b, v10.16b, v11.16b
5355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp1    v8.16b, v4.16b, v6.16b
5365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp2    v10.16b, v4.16b, v6.16b
5375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp1    v9.16b, v5.16b, v7.16b
5385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        uzp2    v11.16b, v5.16b, v7.16b
5395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        \kernel
5415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        zip1    v4.16b, v0.16b, v2.16b
5435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        zip2    v6.16b, v0.16b, v2.16b
5445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        zip1    v5.16b, v1.16b, v3.16b
5455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        zip2    v7.16b, v1.16b, v3.16b
5465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        zip1    v0.16b, v4.16b, v5.16b
5475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        zip2    v1.16b, v4.16b, v5.16b
5485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        zip1    v2.16b, v6.16b, v7.16b
5495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        zip2    v3.16b, v6.16b, v7.16b
5505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .else
5515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        \kernel
5525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
5535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        tbz     x2, #5, 1f
5555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v2.16b,v3.16b}, [x0], #32
5565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #4, 1f
5575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v1.16b}, [x0], #16
5585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #3, 1f
5595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v0.d}[1], [x0], #8
5605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #2, 1f
5615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v0.s}[1], [x0], #4
5625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #1, 1f
5635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v0.h}[1], [x0], #2
5645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tbz     x2, #0, 2f
5655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        st1     {v0.b}[1], [x0], #1
5665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie2:      ld1     {v8.1d - v11.1d}, [sp], #32
5675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        ld1     {v12.1d - v15.1d}, [sp], #32
5685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endif
5695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mov     x0, #0
5705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        ret
5715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
5725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* produce list of blend_line_XX() functions; each function uses the wrap_line
5755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * macro, passing it the name of the operation macro it wants along with
5765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * optional parameters to remove unnecessary operations.
5775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
5785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ;
5795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    BLEND_LIST(BLEND_X)
5805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#undef BLEND_X
5815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
582644d5943a78b3d84a21a617e7fd7fcd8cff4500aMiao Wang#define BLEND_X(d, n) .set tablesize, d+1 ;
583644d5943a78b3d84a21a617e7fd7fcd8cff4500aMiao Wang    BLEND_LIST(BLEND_X)
584644d5943a78b3d84a21a617e7fd7fcd8cff4500aMiao Wang#undef BLEND_X
5855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/*  int rsdIntrinsicBlend_K(
5875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          uchar4 *out,        // x0
5885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          uchar4 const *in,   // x1
5895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          int slot,           // x2
5905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          size_t xstart,      // x3
5915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          size_t xend);       // x4
5925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
5935d06919bc8019322180ea34768a7a4137fa64d11Simon HosieENTRY(rsdIntrinsicBlend_K)
5942b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie    adr     x5, 2f
595644d5943a78b3d84a21a617e7fd7fcd8cff4500aMiao Wang    cmp     w2, tablesize >> 1
5962b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie    bhs     1f
5972b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie    ldrsh   x6, [x5, w2, uxtw #1]
5985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    add     x0, x0, w3, uxtw #2
5995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    add     x1, x1, w3, uxtw #2
6005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    sub     w2, w4, w3
6015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    ubfiz   x2, x2, #2, #32 /* TODO: fix */
6022b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie    cbz     x6, 1f
6032b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie    add     x6, x5, x6
6042b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie    br      x6
6052b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie1:  mov     x0, #-1
6065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    ret
6075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
6082b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie2:
6095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.set off,0
6102b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie#define BLEND_X(d, n) .rept d-off ; .hword 0 ; .endr ; .hword blend_line_##n - 2b ; .set off, d+1 ;
6115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        BLEND_LIST(BLEND_X)
6125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#undef BLEND_X
6132b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie3:
6145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
6155d06919bc8019322180ea34768a7a4137fa64d11Simon HosieEND(rsdIntrinsicBlend_K)
616