15d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* 25d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Copyright (C) 2013-2014 The Android Open Source Project 35d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 45d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Licensed under the Apache License, Version 2.0 (the "License"); 55d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * you may not use this file except in compliance with the License. 65d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * You may obtain a copy of the License at 75d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 85d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * http://www.apache.org/licenses/LICENSE-2.0 95d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Unless required by applicable law or agreed to in writing, software 115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * distributed under the License is distributed on an "AS IS" BASIS, 125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * See the License for the specific language governing permissions and 145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * limitations under the License. 155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart 185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define END(f) .fnend; .size f, .-f; 195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_LIST(X) \ 215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(0, CLEAR) \ 225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(1, SRC) \ 235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(2, DST) \ 245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(3, SRC_OVER) \ 255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(4, DST_OVER) \ 265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(5, SRC_IN) \ 275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(6, DST_IN) \ 285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(7, SRC_OUT) \ 295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(8, DST_OUT) \ 305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(9, SRC_ATOP) \ 315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(10, DST_ATOP) \ 325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(11, XOR) \ 335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(14, MULTIPLY) \ 345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(21, DIFFERENCE) \ 355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(34, ADD) \ 365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(35, SUBTRACT) 375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.eabi_attribute 25,1 @Tag_ABI_align8_preserved 395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.arm 405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* For every blend operation supported, define a macro with just the arithmetic 425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * component. The rest can be handled later on. 435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11 455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * contain the data from the source buffer. Both have already been split out 465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * into one colour component per register (if necessary). q3 and q11 contain 475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * the alpha components. 485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * At the same time as defining the assembly macro, define a corresponding 505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * preprocessor macro indicating any other requirements. 515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * zipped=0 -- The macro does not require the RGBA components to be 525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * separated. 535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * lddst=0 -- The macro does not require data from the destination buffer. 545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * ldsrc=0 -- The macro does not require data from the source buffer. 555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * nowrap=1 -- The macro requires no wrapper at all, and should simply be 565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * inserted without any surrounding load/store or loop code. 575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_CLEAR zipped=0, lddst=0, ldsrc=0 605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_CLEAR 615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q0, #0 625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q1, #0 635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q2, #0 645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q3, #0 655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC zipped=0, lddst=0 685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC 695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov q0, q8 705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov q1, q9 715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov q2, q10 725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov q3, q11 735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST nowrap=1 765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST 775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie /* nop */ 785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_OVER zipped=1 815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_OVER 825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmvn q7, q11 835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q12, d15, d1 855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q0, d14, d0 865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q13, d15, d3 875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q1, d14, d2 885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q14, d15, d5 895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q2, d14, d4 905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q15, d15, d7 915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q3, d14, d6 925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d8, q0, #8 945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d9, q12, #8 955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d10, q1, #8 965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d11, q13, #8 975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d12, q2, #8 985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d13, q14, #8 995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d14, q3, #8 1005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d15, q15, #8 1015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q0, d8 1035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q12, d9 1045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q1, d10 1055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q13, d11 1065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q2, d12 1075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q14, d13 1085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q3, d14 1095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q15, d15 1105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d0, q0, #8 1125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d1, q12, #8 1135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d2, q1, #8 1145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d3, q13, #8 1155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d4, q2, #8 1165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d5, q14, #8 1175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d6, q3, #8 1185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d7, q15, #8 1195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q0, q8 1215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q1, q9 1225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q2, q10 1235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q3, q11 1245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 1255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_OVER zipped=1 1275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_OVER 1285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmvn q7, q3 1295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q12, d15, d17 1315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q8, d14, d16 1325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q13, d15, d19 1335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q9, d14, d18 1345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q14, d15, d21 1355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q10, d14, d20 1365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q15, d15, d23 1375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q11, d14, d22 1385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d8, q0, #8 1405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d9, q12, #8 1415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d10, q1, #8 1425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d11, q13, #8 1435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d12, q2, #8 1445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d13, q14, #8 1455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d14, q3, #8 1465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d15, q15, #8 1475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q8, d8 1495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q12, d9 1505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q9, d10 1515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q13, d11 1525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q10, d12 1535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q14, d13 1545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q11, d14 1555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q15, d15 1565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d16, q8, #8 1585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d17, q12, #8 1595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d18, q9, #8 1605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d19, q13, #8 1615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d20, q10, #8 1625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d21, q14, #8 1635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d22, q11, #8 1645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d23, q15, #8 1655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q0, q8 1675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q1, q9 1685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q2, q10 1695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q3, q11 1705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 1715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_IN zipped=1 1735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_IN 1745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q12, d7, d17 1755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q0, d6, d16 1765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q13, d7, d19 1775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q1, d6, d18 1785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q14, d7, d21 1795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q2, d6, d20 1805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q15, d7, d23 1815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q3, d6, d22 1825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d8, q0, #8 1845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d9, q12, #8 1855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d10, q1, #8 1865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d11, q13, #8 1875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d12, q2, #8 1885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d13, q14, #8 1895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d14, q3, #8 1905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d15, q15, #8 1915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q0, d8 1935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q12, d9 1945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q1, d10 1955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q13, d11 1965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q2, d12 1975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q14, d13 1985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q3, d14 1995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q15, d15 2005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d0, q0, #8 2025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d1, q12, #8 2035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d2, q1, #8 2045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d3, q13, #8 2055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d4, q2, #8 2065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d5, q14, #8 2075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d6, q3, #8 2085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d7, q15, #8 2095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 2105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_IN zipped=1 2125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_IN 2135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q12, d1, d23 2145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q0, d0, d22 2155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q13, d3, d23 2165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q1, d2, d22 2175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q14, d5, d23 2185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q2, d4, d22 2195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q15, d7, d23 2205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q3, d6, d22 2215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d8, q0, #8 2235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d9, q12, #8 2245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d10, q1, #8 2255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d11, q13, #8 2265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d12, q2, #8 2275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d13, q14, #8 2285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d14, q3, #8 2295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d15, q15, #8 2305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q0, d8 2325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q12, d9 2335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q1, d10 2345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q13, d11 2355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q2, d12 2365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q14, d13 2375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q3, d14 2385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q15, d15 2395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d0, q0, #8 2415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d1, q12, #8 2425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d2, q1, #8 2435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d3, q13, #8 2445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d4, q2, #8 2455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d5, q14, #8 2465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d6, q3, #8 2475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d7, q15, #8 2485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 2495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_OUT zipped=1 2515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_OUT 2525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmvn q3, q3 2535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie blend_kernel_SRC_IN 2545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 2555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_OUT zipped=1 2585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_OUT 2595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmvn q11, q11 2605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie blend_kernel_DST_IN 2615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 2625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_ATOP zipped=1 2645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_ATOP 2655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmvn q11, q11 2665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q12, d23, d1 2685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q0, d22, d0 2695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q13, d23, d3 2705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q1, d22, d2 2715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q14, d23, d5 2725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q2, d22, d4 2735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q4, d7, d17 2755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q8, d6, d16 2765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q5, d7, d19 2775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q9, d6, d18 2785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q6, d7, d21 2795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q10, d6, d20 2805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q12, q4 2825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q0, q8 2835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q13, q5 2845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q1, q9 2855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q14, q6 2865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q2, q10 2875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q8, q0, #8 2895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q4, q12, #8 2905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q9, q1, #8 2915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q5, q13, #8 2925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q10, q2, #8 2935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q6, q14, #8 2945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q0, q8 2965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q12, q4 2975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q1, q9 2985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q13, q5 2995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q2, q10 3005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q14, q6 3015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d0, q0, #8 3035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d1, q12, #8 3045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d2, q1, #8 3055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d3, q13, #8 3065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d4, q2, #8 3075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d5, q14, #8 3085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 3095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_ATOP zipped=1 3115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_ATOP 3125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmvn q3, q3 3135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q12, d23, d1 3155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q0, d22, d0 3165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q13, d23, d3 3175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q1, d22, d2 3185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q14, d23, d5 3195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q2, d22, d4 3205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q4, d7, d17 3225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q8, d6, d16 3235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q5, d7, d19 3245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q9, d6, d18 3255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q6, d7, d21 3265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q10, d6, d20 3275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q12, q4 3295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q0, q8 3305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q13, q5 3315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q1, q9 3325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q14, q6 3335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q2, q10 3345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q8, q0, #8 3365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q4, q12, #8 3375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q9, q1, #8 3385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q5, q13, #8 3395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q10, q2, #8 3405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshr.u16 q6, q14, #8 3415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q0, q8 3435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q12, q4 3445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q1, q9 3455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q13, q5 3465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q2, q10 3475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u16 q14, q6 3485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d0, q0, #8 3505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d1, q12, #8 3515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d2, q1, #8 3525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d3, q13, #8 3535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d4, q2, #8 3545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqrshrn.u16 d5, q14, #8 3555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmvn q3, q3 3575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 3585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_MULTIPLY zipped=0 3605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_MULTIPLY 3615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q12, d1, d17 3625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q0, d0, d16 3635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q13, d3, d19 3645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q1, d2, d18 3655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q14, d5, d21 3665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q2, d4, d20 3675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q15, d7, d23 3685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmull.u8 q3, d6, d22 3695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d8, q0, #8 3715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d9, q12, #8 3725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d10, q1, #8 3735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d11, q13, #8 3745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d12, q2, #8 3755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d13, q14, #8 3765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d14, q3, #8 3775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d15, q15, #8 3785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q0, d8 3805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q12, d9 3815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q1, d10 3825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q13, d11 3835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q2, d12 3845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q14, d13 3855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q3, d14 3865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vaddw.u8 q15, d15 3875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d0, q0, #8 3895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d1, q12, #8 3905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d2, q1, #8 3915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d3, q13, #8 3925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d4, q2, #8 3935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d5, q14, #8 3945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d6, q3, #8 3955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vrshrn.u16 d7, q15, #8 3965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 3975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_ADD zipped=0 3995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_ADD 4005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q0, q0, q8 4015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q1, q1, q9 4025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q2, q2, q10 4035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqadd.u8 q3, q3, q11 4045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 4055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SUBTRACT zipped=0 4075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SUBTRACT 4085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqsub.u8 q0, q0, q8 4095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqsub.u8 q1, q1, q9 4105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqsub.u8 q2, q2, q10 4115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vqsub.u8 q3, q3, q11 4125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 4135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DIFFERENCE zipped=0 4155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DIFFERENCE 4165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vabd.u8 q0, q0, q8 4175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vabd.u8 q1, q1, q9 4185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vabd.u8 q2, q2, q10 4195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vabd.u8 q3, q3, q11 4205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 4215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_XOR zipped=0 4235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_XOR 4245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie veor q0, q0, q8 4255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie veor q1, q1, q9 4265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie veor q2, q2, q10 4275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie veor q3, q3, q11 4285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 4295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* Define the wrapper code which will load and store the data, iterate the 4325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * correct number of times, and safely handle the remainder at the end of the 4335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * loop. Various sections of assembly code are dropped or substituted for 4345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * simpler operations if they're not needed. 4355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 4365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1 4375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.if \nowrap 4385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie \kernel 4395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.else 4405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vpush {d8-d15} 4415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie subs r2, #64 4425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie b 2f 4435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .align 4 4445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: 4455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst 4465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \zipped 4475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vld4.8 {d0,d2,d4,d6}, [r0]! 4485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vld4.8 {d1,d3,d5,d7}, [r0]! 4495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .else 4505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vld1.8 {d0-d3}, [r0]! 4515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vld1.8 {d4-d7}, [r0]! 4525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie sub r0, #64 4545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc 4565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \zipped 4575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vld4.8 {d16,d18,d20,d22}, [r1]! 4585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vld4.8 {d17,d19,d21,d23}, [r1]! 4595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .else 4605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vld1.8 {d16-d19}, [r1]! 4615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vld1.8 {d20-d23}, [r1]! 4625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \pld 4655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; pld [r0, #192] ; .endif 4665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; pld [r1, #192] ; .endif 4675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie \kernel 4705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie subs r2, #64 4725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \zipped 4735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst4.8 {d0,d2,d4,d6}, [r0]! 4745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst4.8 {d1,d3,d5,d7}, [r0]! 4755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .else 4765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst1.8 {d0-d3}, [r0]! 4775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst1.8 {d4-d7}, [r0]! 4785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie2: bge 1b 4815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie adds r2, #64 4825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 2f 4835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie /* To handle the tail portion of the data (something less than 64 4855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * bytes) load small power-of-two chunks into working registers. It 4865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * doesn't matter where they end up in the register; the same process 4875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * will store them back out using the same positions and the operations 4885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * don't require data to interact with its neighbours. 4895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 4905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q0, #0 4915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q1, #0 4925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q2, #0 4935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q3, #0 4945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q8, #0 4965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q9, #0 4975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q10, #0 4985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vmov.i8 q11, #0 4995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie tst r2, #32 5015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; vld1.64 {d4-d7}, [r0]! ; .endif 5035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; vld1.64 {d20-d23}, [r1]! ; .endif 5045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #16 5055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; vld1.64 {d2-d3}, [r0]! ; .endif 5075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; vld1.64 {d18-d19}, [r1]! ; .endif 5085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #8 5095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; vld1.64 {d1}, [r0]! ; .endif 5115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; vld1.64 {d17}, [r1]! ; .endif 5125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #4 5135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; vld1.32 {d0[1]}, [r0]! ; .endif 5155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; vld1.32 {d16[1]}, [r1]! ; .endif 5165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #2 5175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; vld1.16 {d0[1]}, [r0]! ; .endif 5195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; vld1.16 {d16[1]}, [r1]! ; .endif 5205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #1 5215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; vld1.8 {d0[1]}, [r0]! ; .endif 5235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; vld1.8 {d16[1]}, [r1]! ; .endif 5245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: 5255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; sub r0, r2 ; .endif 5265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \zipped 5285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie /* One small impediment in the process above is that some of the load 5295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * operations can't perform byte-wise structure deinterleaving at the 5305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * same time as loading only part of a register. So the data is loaded 5315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * linearly and unpacked manually at this point. 5325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 5335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vuzp.8 q0, q1 5345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vuzp.8 q2, q3 5355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vuzp.8 q0, q2 5365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vuzp.8 q1, q3 5375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vuzp.8 q8, q9 5395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vuzp.8 q10, q11 5405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vuzp.8 q8, q10 5415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vuzp.8 q9, q11 5425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie \kernel 5445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vzip.8 q0, q2 5465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vzip.8 q1, q3 5475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vzip.8 q0, q1 5485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vzip.8 q2, q3 5495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .else 5505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie \kernel 5515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 5525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie tst r2, #32 5545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst1.64 {d4-d7}, [r0]! 5565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #16 5575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst1.64 {d2-d3}, [r0]! 5595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #8 5605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst1.64 {d1}, [r0]! 5625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #4 5635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst1.32 {d0[1]}, [r0]! 5655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #2 5665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 1f 5675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst1.16 {d0[1]}, [r0]! 5685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tst r2, #1 5695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 2f 5705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie vst1.8 {d0[1]}, [r0]! 5715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie2: vpop {d8-d15} 5725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endif 5735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mov r0, #0 5745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie bx lr 5755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 5765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* produce list of blend_line_XX() functions; each function uses the wrap_line 5795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * macro, passing it the name of the operation macro it wants along with 5805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * optional parameters to remove unnecessary operations. 5815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 5825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ; 5835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie BLEND_LIST(BLEND_X) 5845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#undef BLEND_X 5855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* int rsdIntrinsicBlend_K( 5885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * uchar4 *out, // r0 5895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * uchar4 const *in, // r1 5905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * int slot, // r2 5915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * size_t xstart, // r3 5925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * size_t xend); // [sp] 5935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 5945d06919bc8019322180ea34768a7a4137fa64d11Simon HosieENTRY(rsdIntrinsicBlend_K) 5955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie adr ip, blend_functions 5965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie cmp r2, #(blend_functions_end - blend_functions) >> 2 5975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ldrlo ip, [ip, r2, LSL #2] 5985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movhs ip, #0 5995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ldr r2, [sp] 6005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie add r0, r3, LSL #2 6015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie add r1, r3, LSL #2 6025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie sub r2, r3 6035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mov r2, r2, LSL #2 6045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie cmp ip, #0 6055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie addne ip, ip, pc 6065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie bxne ip 6075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: mov r0, #-1 6085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie bx lr 6095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 6105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosieblend_functions: 6115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.set off,0 6125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_X(d, n) .rept d-off ; .word 0 ; .endr ; .word blend_line_##n-1b ; .set off, d+1 ; 6135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie BLEND_LIST(BLEND_X) 6145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#undef BLEND_X 6155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosieblend_functions_end: 6165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 6175d06919bc8019322180ea34768a7a4137fa64d11Simon HosieEND(rsdIntrinsicBlend_K) 618