15d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/*
25d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Copyright (C) 2013-2014 The Android Open Source Project
35d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
45d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Licensed under the Apache License, Version 2.0 (the "License");
55d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * you may not use this file except in compliance with the License.
65d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * You may obtain a copy of the License at
75d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
85d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *      http://www.apache.org/licenses/LICENSE-2.0
95d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Unless required by applicable law or agreed to in writing, software
115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * distributed under the License is distributed on an "AS IS" BASIS,
125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * See the License for the specific language governing permissions and
145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * limitations under the License.
155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart
185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define END(f) .fnend; .size f, .-f;
195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_LIST(X) \
215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(0, CLEAR) \
225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(1, SRC) \
235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(2, DST) \
245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(3, SRC_OVER) \
255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(4, DST_OVER) \
265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(5, SRC_IN) \
275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(6, DST_IN) \
285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(7, SRC_OUT) \
295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(8, DST_OUT) \
305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(9, SRC_ATOP) \
315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(10, DST_ATOP) \
325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(11, XOR) \
335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(14, MULTIPLY) \
345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(21, DIFFERENCE) \
355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(34, ADD) \
365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    X(35, SUBTRACT)
375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.eabi_attribute 25,1 @Tag_ABI_align8_preserved
395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.arm
405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* For every blend operation supported, define a macro with just the arithmetic
425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * component.  The rest can be handled later on.
435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11
455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * contain the data from the source buffer.  Both have already been split out
465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * into one colour component per register (if necessary).  q3 and q11 contain
475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * the alpha components.
485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *
495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * At the same time as defining the assembly macro, define a corresponding
505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * preprocessor macro indicating any other requirements.
515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *    zipped=0 -- The macro does not require the RGBA components to be
525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *                separated.
535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *    lddst=0  -- The macro does not require data from the destination buffer.
545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *    ldsrc=0  -- The macro does not require data from the source buffer.
555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *    nowrap=1 -- The macro requires no wrapper at all, and should simply be
565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *                inserted without any surrounding load/store or loop code.
575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_CLEAR zipped=0, lddst=0, ldsrc=0
605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_CLEAR
615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q0, #0
625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q1, #0
635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q2, #0
645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q3, #0
655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC zipped=0, lddst=0
685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC
695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov    q0, q8
705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov    q1, q9
715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov    q2, q10
725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov    q3, q11
735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST nowrap=1
765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST
775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        /* nop */
785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_OVER zipped=1
815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_OVER
825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmvn        q7, q11
835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q12, d15, d1
855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q0,  d14, d0
865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q13, d15, d3
875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q1,  d14, d2
885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q14, d15, d5
895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q2,  d14, d4
905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q15, d15, d7
915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q3,  d14, d6
925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d8,  q0,  #8
945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d9,  q12, #8
955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d10, q1,  #8
965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d11, q13, #8
975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d12, q2,  #8
985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d13, q14, #8
995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d14, q3,  #8
1005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d15, q15, #8
1015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q0,  d8
1035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q12, d9
1045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q1,  d10
1055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q13, d11
1065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q2,  d12
1075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q14, d13
1085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q3,  d14
1095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q15, d15
1105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d0, q0,  #8
1125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d1, q12, #8
1135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d2, q1,  #8
1145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d3, q13, #8
1155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d4, q2,  #8
1165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d5, q14, #8
1175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d6, q3,  #8
1185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d7, q15, #8
1195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8    q0, q8
1215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8    q1, q9
1225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8    q2, q10
1235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8    q3, q11
1245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
1255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_OVER zipped=1
1275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_OVER
1285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmvn        q7, q3
1295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q12, d15, d17
1315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q8,  d14, d16
1325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q13, d15, d19
1335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q9,  d14, d18
1345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q14, d15, d21
1355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q10, d14, d20
1365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q15, d15, d23
1375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q11, d14, d22
1385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d8,  q0,  #8
1405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d9,  q12, #8
1415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d10, q1,  #8
1425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d11, q13, #8
1435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d12, q2,  #8
1445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d13, q14, #8
1455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d14, q3,  #8
1465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d15, q15, #8
1475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q8,  d8
1495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q12, d9
1505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q9,  d10
1515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q13, d11
1525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q10, d12
1535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q14, d13
1545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q11, d14
1555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q15, d15
1565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d16, q8,  #8
1585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d17, q12, #8
1595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d18, q9,  #8
1605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d19, q13, #8
1615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d20, q10, #8
1625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d21, q14, #8
1635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d22, q11, #8
1645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d23, q15, #8
1655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8    q0, q8
1675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8    q1, q9
1685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8    q2, q10
1695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8    q3, q11
1705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
1715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_IN zipped=1
1735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_IN
1745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q12, d7, d17
1755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q0,  d6, d16
1765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q13, d7, d19
1775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q1,  d6, d18
1785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q14, d7, d21
1795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q2,  d6, d20
1805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q15, d7, d23
1815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q3,  d6, d22
1825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d8,  q0,  #8
1845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d9,  q12, #8
1855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d10, q1,  #8
1865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d11, q13, #8
1875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d12, q2,  #8
1885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d13, q14, #8
1895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d14, q3,  #8
1905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d15, q15, #8
1915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
1925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q0,  d8
1935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q12, d9
1945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q1,  d10
1955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q13, d11
1965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q2,  d12
1975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q14, d13
1985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q3,  d14
1995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q15, d15
2005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d0, q0,  #8
2025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d1, q12, #8
2035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d2, q1,  #8
2045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d3, q13, #8
2055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d4, q2,  #8
2065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d5, q14, #8
2075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d6, q3,  #8
2085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d7, q15, #8
2095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
2105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_IN zipped=1
2125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_IN
2135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q12, d1, d23
2145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q0,  d0, d22
2155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q13, d3, d23
2165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q1,  d2, d22
2175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q14, d5, d23
2185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q2,  d4, d22
2195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q15, d7, d23
2205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q3,  d6, d22
2215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d8,  q0,  #8
2235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d9,  q12, #8
2245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d10, q1,  #8
2255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d11, q13, #8
2265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d12, q2,  #8
2275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d13, q14, #8
2285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d14, q3,  #8
2295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d15, q15, #8
2305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q0,  d8
2325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q12, d9
2335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q1,  d10
2345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q13, d11
2355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q2,  d12
2365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q14, d13
2375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q3,  d14
2385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q15, d15
2395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d0, q0,  #8
2415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d1, q12, #8
2425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d2, q1,  #8
2435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d3, q13, #8
2445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d4, q2,  #8
2455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d5, q14, #8
2465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d6, q3,  #8
2475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d7, q15, #8
2485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
2495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_OUT zipped=1
2515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_OUT
2525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmvn        q3, q3
2535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        blend_kernel_SRC_IN
2545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
2555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_OUT zipped=1
2585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_OUT
2595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmvn        q11, q11
2605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        blend_kernel_DST_IN
2615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
2625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_ATOP zipped=1
2645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_ATOP
2655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmvn        q11, q11
2665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q12, d23, d1
2685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q0,  d22, d0
2695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q13, d23, d3
2705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q1,  d22, d2
2715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q14, d23, d5
2725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q2,  d22, d4
2735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q4,  d7, d17
2755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q8,  d6, d16
2765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q5,  d7, d19
2775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q9,  d6, d18
2785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q6,  d7, d21
2795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q10, d6, d20
2805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q12, q4
2825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q0,  q8
2835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q13, q5
2845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q1,  q9
2855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q14, q6
2865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q2,  q10
2875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q8,  q0,  #8
2895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q4,  q12, #8
2905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q9,  q1,  #8
2915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q5,  q13, #8
2925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q10, q2,  #8
2935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q6,  q14, #8
2945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
2955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q0,  q8
2965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q12, q4
2975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q1,  q9
2985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q13, q5
2995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q2,  q10
3005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q14, q6
3015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d0, q0,  #8
3035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d1, q12, #8
3045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d2, q1,  #8
3055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d3, q13, #8
3065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d4, q2,  #8
3075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d5, q14, #8
3085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
3095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_ATOP zipped=1
3115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_ATOP
3125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmvn        q3, q3
3135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q12, d23, d1
3155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q0,  d22, d0
3165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q13, d23, d3
3175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q1,  d22, d2
3185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q14, d23, d5
3195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q2,  d22, d4
3205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q4,  d7, d17
3225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q8,  d6, d16
3235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q5,  d7, d19
3245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q9,  d6, d18
3255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q6,  d7, d21
3265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q10, d6, d20
3275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q12, q4
3295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q0,  q8
3305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q13, q5
3315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q1,  q9
3325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q14, q6
3335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q2,  q10
3345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q8,  q0,  #8
3365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q4,  q12, #8
3375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q9,  q1,  #8
3385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q5,  q13, #8
3395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q10, q2,  #8
3405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshr.u16   q6,  q14, #8
3415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q0,  q8
3435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q12, q4
3445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q1,  q9
3455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q13, q5
3465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q2,  q10
3475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u16   q14, q6
3485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d0, q0,  #8
3505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d1, q12, #8
3515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d2, q1,  #8
3525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d3, q13, #8
3535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d4, q2,  #8
3545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqrshrn.u16 d5, q14, #8
3555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmvn        q3, q3
3575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
3585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_MULTIPLY zipped=0
3605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_MULTIPLY
3615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q12, d1, d17
3625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q0,  d0, d16
3635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q13, d3, d19
3645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q1,  d2, d18
3655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q14, d5, d21
3665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q2,  d4, d20
3675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q15, d7, d23
3685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmull.u8    q3,  d6, d22
3695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d8,  q0,  #8
3715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d9,  q12, #8
3725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d10, q1,  #8
3735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d11, q13, #8
3745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d12, q2,  #8
3755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d13, q14, #8
3765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d14, q3,  #8
3775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d15, q15, #8
3785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q0,  d8
3805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q12, d9
3815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q1,  d10
3825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q13, d11
3835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q2,  d12
3845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q14, d13
3855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q3,  d14
3865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vaddw.u8    q15, d15
3875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d0, q0,  #8
3895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d1, q12, #8
3905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d2, q1,  #8
3915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d3, q13, #8
3925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d4, q2,  #8
3935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d5, q14, #8
3945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d6, q3,  #8
3955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vrshrn.u16  d7, q15, #8
3965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
3975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
3985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_ADD zipped=0
3995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_ADD
4005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8 q0, q0, q8
4015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8 q1, q1, q9
4025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8 q2, q2, q10
4035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqadd.u8 q3, q3, q11
4045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
4055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SUBTRACT zipped=0
4075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SUBTRACT
4085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqsub.u8 q0, q0, q8
4095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqsub.u8 q1, q1, q9
4105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqsub.u8 q2, q2, q10
4115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vqsub.u8 q3, q3, q11
4125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
4135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DIFFERENCE zipped=0
4155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DIFFERENCE
4165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vabd.u8 q0, q0, q8
4175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vabd.u8 q1, q1, q9
4185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vabd.u8 q2, q2, q10
4195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vabd.u8 q3, q3, q11
4205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
4215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_XOR zipped=0
4235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_XOR
4245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        veor    q0, q0, q8
4255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        veor    q1, q1, q9
4265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        veor    q2, q2, q10
4275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        veor    q3, q3, q11
4285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
4295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* Define the wrapper code which will load and store the data, iterate the
4325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * correct number of times, and safely handle the remainder at the end of the
4335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * loop.  Various sections of assembly code are dropped or substituted for
4345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * simpler operations if they're not needed.
4355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
4365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1
4375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.if \nowrap
4385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        \kernel
4395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.else
4405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vpush   {d8-d15}
4415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        subs    r2, #64
4425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        b       2f
4435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        .align 4
4445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:
4455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst
4465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .if \zipped
4475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vld4.8  {d0,d2,d4,d6}, [r0]!
4485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vld4.8  {d1,d3,d5,d7}, [r0]!
4495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .else
4505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vld1.8  {d0-d3}, [r0]!
4515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vld1.8  {d4-d7}, [r0]!
4525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .endif
4535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        sub     r0, #64
4545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
4555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc
4565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .if \zipped
4575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vld4.8  {d16,d18,d20,d22}, [r1]!
4585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vld4.8  {d17,d19,d21,d23}, [r1]!
4595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .else
4605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vld1.8  {d16-d19}, [r1]!
4615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vld1.8  {d20-d23}, [r1]!
4625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .endif
4635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
4645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \pld
4655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .if \lddst ; pld [r0, #192] ; .endif
4665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    .if \ldsrc ; pld [r1, #192] ; .endif
4675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
4685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        \kernel
4705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        subs    r2, #64
4725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \zipped
4735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst4.8  {d0,d2,d4,d6}, [r0]!
4745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst4.8  {d1,d3,d5,d7}, [r0]!
4755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .else
4765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst1.8  {d0-d3}, [r0]!
4775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst1.8  {d4-d7}, [r0]!
4785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
4795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie2:      bge     1b
4815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        adds    r2, #64
4825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     2f
4835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        /* To handle the tail portion of the data (something less than 64
4855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * bytes) load small power-of-two chunks into working registers.  It
4865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * doesn't matter where they end up in the register; the same process
4875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * will store them back out using the same positions and the operations
4885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * don't require data to interact with its neighbours.
4895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         */
4905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q0, #0
4915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q1, #0
4925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q2, #0
4935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q3, #0
4945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
4955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q8, #0
4965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q9, #0
4975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q10, #0
4985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vmov.i8 q11, #0
4995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        tst     r2, #32
5015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; vld1.64 {d4-d7}, [r0]!   ; .endif
5035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; vld1.64 {d20-d23}, [r1]! ; .endif
5045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #16
5055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; vld1.64 {d2-d3}, [r0]!   ; .endif
5075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; vld1.64 {d18-d19}, [r1]! ; .endif
5085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #8
5095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; vld1.64 {d1}, [r0]!      ; .endif
5115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; vld1.64 {d17}, [r1]!     ; .endif
5125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #4
5135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; vld1.32 {d0[1]}, [r0]!   ; .endif
5155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; vld1.32 {d16[1]}, [r1]!  ; .endif
5165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #2
5175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; vld1.16 {d0[1]}, [r0]!   ; .endif
5195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; vld1.16 {d16[1]}, [r1]!  ; .endif
5205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #1
5215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; vld1.8  {d0[1]}, [r0]!   ; .endif
5235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \ldsrc ; vld1.8  {d16[1]}, [r1]!  ; .endif
5245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:
5255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \lddst ; sub     r0, r2           ; .endif
5265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .if \zipped
5285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        /* One small impediment in the process above is that some of the load
5295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * operations can't perform byte-wise structure deinterleaving at the
5305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * same time as loading only part of a register.  So the data is loaded
5315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         * linearly and unpacked manually at this point.
5325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie         */
5335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vuzp.8  q0, q1
5345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vuzp.8  q2, q3
5355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vuzp.8  q0, q2
5365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vuzp.8  q1, q3
5375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vuzp.8  q8, q9
5395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vuzp.8  q10, q11
5405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vuzp.8  q8, q10
5415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vuzp.8  q9, q11
5425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        \kernel
5445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vzip.8  q0, q2
5465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vzip.8  q1, q3
5475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vzip.8  q0, q1
5485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vzip.8  q2, q3
5495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .else
5505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        \kernel
5515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie  .endif
5525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        tst     r2, #32
5545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst1.64 {d4-d7}, [r0]!
5565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #16
5575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst1.64 {d2-d3}, [r0]!
5595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #8
5605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst1.64 {d1}, [r0]!
5625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #4
5635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst1.32 {d0[1]}, [r0]!
5655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #2
5665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     1f
5675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst1.16 {d0[1]}, [r0]!
5685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:      tst     r2, #1
5695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        beq     2f
5705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        vst1.8  {d0[1]}, [r0]!
5715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie2:      vpop    {d8-d15}
5725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endif
5735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        mov     r0, #0
5745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        bx      lr
5755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm
5765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* produce list of blend_line_XX() functions; each function uses the wrap_line
5795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * macro, passing it the name of the operation macro it wants along with
5805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * optional parameters to remove unnecessary operations.
5815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
5825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ;
5835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    BLEND_LIST(BLEND_X)
5845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#undef BLEND_X
5855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
5875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/*  int rsdIntrinsicBlend_K(
5885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          uchar4 *out,        // r0
5895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          uchar4 const *in,   // r1
5905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          int slot,           // r2
5915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          size_t xstart,      // r3
5925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie *          size_t xend);       // [sp]
5935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */
5945d06919bc8019322180ea34768a7a4137fa64d11Simon HosieENTRY(rsdIntrinsicBlend_K)
5955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    adr     ip, blend_functions
5965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    cmp     r2, #(blend_functions_end - blend_functions) >> 2
5975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    ldrlo   ip, [ip, r2, LSL #2]
5985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    movhs   ip, #0
5995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    ldr     r2, [sp]
6005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    add     r0, r3, LSL #2
6015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    add     r1, r3, LSL #2
6025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    sub     r2, r3
6035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    mov     r2, r2, LSL #2
6045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    cmp     ip, #0
6055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    addne   ip, ip, pc
6065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    bxne    ip
6075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1:  mov     r0, #-1
6085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie    bx      lr
6095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
6105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosieblend_functions:
6115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.set off,0
6125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_X(d, n) .rept d-off ; .word 0 ; .endr ; .word blend_line_##n-1b ; .set off, d+1 ;
6135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie        BLEND_LIST(BLEND_X)
6145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#undef BLEND_X
6155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosieblend_functions_end:
6165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie
6175d06919bc8019322180ea34768a7a4137fa64d11Simon HosieEND(rsdIntrinsicBlend_K)
618