15d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* 25d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Copyright (C) 2013-2014 The Android Open Source Project 35d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 45d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Licensed under the Apache License, Version 2.0 (the "License"); 55d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * you may not use this file except in compliance with the License. 65d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * You may obtain a copy of the License at 75d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 85d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * http://www.apache.org/licenses/LICENSE-2.0 95d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * Unless required by applicable law or agreed to in writing, software 115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * distributed under the License is distributed on an "AS IS" BASIS, 125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * See the License for the specific language governing permissions and 145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * limitations under the License. 155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: 185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define END(f) .size f, .-f; 195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_LIST(X) \ 215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(0, CLEAR) \ 225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(1, SRC) \ 235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(2, DST) \ 245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(3, SRC_OVER) \ 255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(4, DST_OVER) \ 265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(5, SRC_IN) \ 275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(6, DST_IN) \ 285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(7, SRC_OUT) \ 295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(8, DST_OUT) \ 305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(9, SRC_ATOP) \ 315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(10, DST_ATOP) \ 325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(11, XOR) \ 335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(14, MULTIPLY) \ 345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(21, DIFFERENCE) \ 355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(34, ADD) \ 365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie X(35, SUBTRACT) 375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* For every blend operation supported, define a macro with just the arithmetic 395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * component. The rest can be handled later on. 405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11 425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * contain the data from the source buffer. Both have already been split out 435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * into one colour component per register (if necessary). q3 and q11 contain 445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * the alpha components. 455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * 465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * At the same time as defining the assembly macro, define a corresponding 475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * preprocessor macro indicating any other requirements. 485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * zipped=0 -- The macro does not require the RGBA components to be 495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * separated. 505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * lddst=0 -- The macro does not require data from the destination buffer. 515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * ldsrc=0 -- The macro does not require data from the source buffer. 525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * nowrap=1 -- The macro requires no wrapper at all, and should simply be 535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * inserted without any surrounding load/store or loop code. 545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_CLEAR zipped=0, lddst=0, ldsrc=0 575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_CLEAR 585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v0.16b, #0 595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v1.16b, #0 605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v2.16b, #0 615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v3.16b, #0 625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC zipped=0, lddst=0 655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC 665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mov v0.16b, v8.16b 675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mov v1.16b, v9.16b 685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mov v2.16b, v10.16b 695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mov v3.16b, v11.16b 705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST nowrap=1 735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST 745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie /* nop */ 755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_OVER zipped=1 785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_OVER 795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mvn v7.16b, v11.16b 805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v12.8h, v7.16b, v0.16b 825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v0.8h, v7.8b, v0.8b 835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v13.8h, v7.16b, v1.16b 845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v1.8h, v7.8b, v1.8b 855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v14.8h, v7.16b, v2.16b 865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v2.8h, v7.8b, v2.8b 875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v15.8h, v7.16b, v3.16b 885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v3.8h, v7.8b, v3.8b 895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v4.8b, v0.8h, #8 915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v4.16b, v12.8h, #8 925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v5.8b, v1.8h, #8 935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v5.16b, v13.8h, #8 945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v6.8b, v2.8h, #8 955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v6.16b, v14.8h, #8 965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v7.8b, v3.8h, #8 975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v7.16b, v15.8h, #8 985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v0.8h, v0.8h, v4.8b 1005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v12.8h, v12.8h, v4.16b 1015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v1.8h, v1.8h, v5.8b 1025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v13.8h, v13.8h, v5.16b 1035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v2.8h, v2.8h, v6.8b 1045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v14.8h, v14.8h, v6.16b 1055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v3.8h, v3.8h, v7.8b 1065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v15.8h, v15.8h, v7.16b 1075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v0.8b, v0.8h, #8 1095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v0.16b, v12.8h, #8 1105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v1.8b, v1.8h, #8 1115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v1.16b, v13.8h, #8 1125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v2.8b, v2.8h, #8 1135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v2.16b, v14.8h, #8 1145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v3.8b, v3.8h, #8 1155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v3.16b, v15.8h, #8 1165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v0.16b, v0.16b, v8.16b 1185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v1.16b, v1.16b, v9.16b 1195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v2.16b, v2.16b, v10.16b 1205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v3.16b, v3.16b, v11.16b 1215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 1225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_OVER zipped=1 1245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_OVER 1255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mvn v7.16b, v3.16b 1265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v12.8h, v7.16b, v8.16b 1285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v8.8h, v7.8b, v8.8b 1295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v13.8h, v7.16b, v9.16b 1305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v9.8h, v7.8b, v9.8b 1315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v14.8h, v7.16b, v10.16b 1325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v10.8h, v7.8b, v10.8b 1335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v15.8h, v7.16b, v11.16b 1345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v11.8h, v7.8b, v11.8b 1355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v4.8b, v8.8h, #8 1375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v4.16b, v12.8h, #8 1385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v5.8b, v9.8h, #8 1395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v5.16b, v13.8h, #8 1405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v6.8b, v10.8h, #8 1415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v6.16b, v14.8h, #8 1425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v7.8b, v11.8h, #8 1435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v7.16b, v15.8h, #8 1445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v8.8h, v8.8h, v4.8b 1465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v12.8h, v12.8h, v4.16b 1475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v9.8h, v9.8h, v5.8b 1485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v13.8h, v13.8h, v5.16b 1495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v10.8h, v10.8h, v6.8b 1505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v14.8h, v14.8h, v6.16b 1515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v11.8h, v11.8h, v7.8b 1525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v15.8h, v15.8h, v7.16b 1535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v8.8b, v8.8h, #8 1555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v8.16b, v12.8h, #8 1565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v9.8b, v9.8h, #8 1575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v9.16b, v13.8h, #8 1585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v10.8b, v10.8h, #8 1595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v10.16b, v14.8h, #8 1605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v11.8b, v11.8h, #8 1615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v11.16b, v15.8h, #8 1625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v0.16b, v0.16b, v8.16b 1645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v1.16b, v1.16b, v9.16b 1655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v2.16b, v2.16b, v10.16b 1665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v3.16b, v3.16b, v11.16b 1675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 1685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_IN zipped=1 1705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_IN 1715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v12.8h, v3.16b, v8.16b 1725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v0.8h, v3.8b, v8.8b 1735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v13.8h, v3.16b, v9.16b 1745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v1.8h, v3.8b, v9.8b 1755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v14.8h, v3.16b, v10.16b 1765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v2.8h, v3.8b, v10.8b 1775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v15.8h, v3.16b, v11.16b 1785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v3.8h, v3.8b, v11.8b 1795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v4.8b, v0.8h, #8 1815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v4.16b, v12.8h, #8 1825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v5.8b, v1.8h, #8 1835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v5.16b, v13.8h, #8 1845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v6.8b, v2.8h, #8 1855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v6.16b, v14.8h, #8 1865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v7.8b, v3.8h, #8 1875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v7.16b, v15.8h, #8 1885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v0.8h, v0.8h, v4.8b 1905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v12.8h, v12.8h, v4.16b 1915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v1.8h, v1.8h, v5.8b 1925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v13.8h, v13.8h, v5.16b 1935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v2.8h, v2.8h, v6.8b 1945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v14.8h, v14.8h, v6.16b 1955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v3.8h, v3.8h, v7.8b 1965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v15.8h, v15.8h, v7.16b 1975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 1985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v0.8b, v0.8h, #8 1995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v0.16b, v12.8h, #8 2005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v1.8b, v1.8h, #8 2015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v1.16b, v13.8h, #8 2025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v2.8b, v2.8h, #8 2035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v2.16b, v14.8h, #8 2045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v3.8b, v3.8h, #8 2055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v3.16b, v15.8h, #8 2065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 2075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_IN zipped=1 2095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_IN 2105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v12.8h, v0.16b, v11.16b 2115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v0.8h, v0.8b, v11.8b 2125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v13.8h, v1.16b, v11.16b 2135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v1.8h, v1.8b, v11.8b 2145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v14.8h, v2.16b, v11.16b 2155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v2.8h, v2.8b, v11.8b 2165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v15.8h, v3.16b, v11.16b 2175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v3.8h, v3.8b, v11.8b 2185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v4.8b, v0.8h, #8 2205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v4.16b, v12.8h, #8 2215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v5.8b, v1.8h, #8 2225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v5.16b, v13.8h, #8 2235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v6.8b, v2.8h, #8 2245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v6.16b, v14.8h, #8 2255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v7.8b, v3.8h, #8 2265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v7.16b, v15.8h, #8 2275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v0.8h, v0.8h, v4.8b 2295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v12.8h, v12.8h, v4.16b 2305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v1.8h, v1.8h, v5.8b 2315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v13.8h, v13.8h, v5.16b 2325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v2.8h, v2.8h, v6.8b 2335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v14.8h, v14.8h, v6.16b 2345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v3.8h, v3.8h, v7.8b 2355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v15.8h, v15.8h, v7.16b 2365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v0.8b, v0.8h, #8 2385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v0.16b, v12.8h, #8 2395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v1.8b, v1.8h, #8 2405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v1.16b, v13.8h, #8 2415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v2.8b, v2.8h, #8 2425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v2.16b, v14.8h, #8 2435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v3.8b, v3.8h, #8 2445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v3.16b, v15.8h, #8 2455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 2465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_OUT zipped=1 2485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_OUT 2495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mvn v3.16b, v3.16b 2505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie blend_kernel_SRC_IN 2515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 2525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_OUT zipped=1 2555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_OUT 2565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mvn v11.16b, v11.16b 2575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie blend_kernel_DST_IN 2585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 2595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SRC_ATOP zipped=1 2615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SRC_ATOP 2625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mvn v11.16b, v11.16b 2635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v12.8h, v11.16b, v0.16b 2655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v0.8h, v11.8b, v0.8b 2665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v13.8h, v11.16b, v1.16b 2675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v1.8h, v11.8b, v1.8b 2685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v14.8h, v11.16b, v2.16b 2695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v2.8h, v11.8b, v2.8b 2705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v4.8h, v3.16b, v8.16b 2725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v8.8h, v3.8b, v8.8b 2735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v5.8h, v3.16b, v9.16b 2745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v9.8h, v3.8b, v9.8b 2755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v6.8h, v3.16b, v10.16b 2765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v10.8h, v3.8b, v10.8b 2775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v12.8h, v12.8h, v4.8h 2795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v0.8h, v0.8h, v8.8h 2805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v13.8h, v13.8h, v5.8h 2815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v1.8h, v1.8h, v9.8h 2825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v14.8h, v14.8h, v6.8h 2835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v2.8h, v2.8h, v10.8h 2845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v8.8h, v0.8h, #8 2865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v4.8h, v12.8h, #8 2875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v9.8h, v1.8h, #8 2885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v5.8h, v13.8h, #8 2895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v10.8h, v2.8h, #8 2905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v6.8h, v14.8h, #8 2915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v0.8h, v0.8h, v8.8h 2935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v12.8h, v12.8h, v4.8h 2945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v1.8h, v1.8h, v9.8h 2955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v13.8h, v13.8h, v5.8h 2965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v2.8h, v2.8h, v10.8h 2975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v14.8h, v14.8h, v6.8h 2985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 2995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn v0.8b, v0.8h, #8 3005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn2 v0.16b, v12.8h, #8 3015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn v1.8b, v1.8h, #8 3025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn2 v1.16b, v13.8h, #8 3035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn v2.8b, v2.8h, #8 3045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn2 v2.16b, v14.8h, #8 3055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 3065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DST_ATOP zipped=1 3085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DST_ATOP 3095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mvn v3.16b, v3.16b 3105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v12.8h, v11.16b, v0.16b 3125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v0.8h, v11.8b, v0.8b 3135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v13.8h, v11.16b, v1.16b 3145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v1.8h, v11.8b, v1.8b 3155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v14.8h, v11.16b, v2.16b 3165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v2.8h, v11.8b, v2.8b 3175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v4.8h, v3.16b, v8.16b 3195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v8.8h, v3.8b, v8.8b 3205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v5.8h, v3.16b, v9.16b 3215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v9.8h, v3.8b, v9.8b 3225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v6.8h, v3.16b, v10.16b 3235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v10.8h, v3.8b, v10.8b 3245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v12.8h, v12.8h, v4.8h 3265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v0.8h, v0.8h, v8.8h 3275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v13.8h, v13.8h, v5.8h 3285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v1.8h, v1.8h, v9.8h 3295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v14.8h, v14.8h, v6.8h 3305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v2.8h, v2.8h, v10.8h 3315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v8.8h, v0.8h, #8 3335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v4.8h, v12.8h, #8 3345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v9.8h, v1.8h, #8 3355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v5.8h, v13.8h, #8 3365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v10.8h, v2.8h, #8 3375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie urshr v6.8h, v14.8h, #8 3385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v0.8h, v0.8h, v8.8h 3405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v12.8h, v12.8h, v4.8h 3415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v1.8h, v1.8h, v9.8h 3425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v13.8h, v13.8h, v5.8h 3435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v2.8h, v2.8h, v10.8h 3445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v14.8h, v14.8h, v6.8h 3455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn v0.8b, v0.8h, #8 3475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn2 v0.16b, v12.8h, #8 3485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn v1.8b, v1.8h, #8 3495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn2 v1.16b, v13.8h, #8 3505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn v2.8b, v2.8h, #8 3515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqrshrn2 v2.16b, v14.8h, #8 3525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mvn v3.16b, v3.16b 3545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 3555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_MULTIPLY zipped=0 3575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_MULTIPLY 3585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v12.8h, v0.16b, v8.16b 3595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v0.8h, v0.8b, v8.8b 3605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v13.8h, v1.16b, v9.16b 3615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v1.8h, v1.8b, v9.8b 3625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v14.8h, v2.16b, v10.16b 3635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v2.8h, v2.8b, v10.8b 3645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull2 v15.8h, v3.16b, v11.16b 3655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie umull v3.8h, v3.8b, v11.8b 3665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v4.8b, v0.8h, #8 3685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v4.16b, v12.8h, #8 3695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v5.8b, v1.8h, #8 3705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v5.16b, v13.8h, #8 3715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v6.8b, v2.8h, #8 3725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v6.16b, v14.8h, #8 3735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v7.8b, v3.8h, #8 3745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v7.16b, v15.8h, #8 3755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v0.8h, v0.8h, v4.8b 3775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v12.8h, v12.8h, v4.16b 3785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v1.8h, v1.8h, v5.8b 3795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v13.8h, v13.8h, v5.16b 3805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v2.8h, v2.8h, v6.8b 3815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v14.8h, v14.8h, v6.16b 3825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw v3.8h, v3.8h, v7.8b 3835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uaddw2 v15.8h, v15.8h, v7.16b 3845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v0.8b, v0.8h, #8 3865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v0.16b, v12.8h, #8 3875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v1.8b, v1.8h, #8 3885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v1.16b, v13.8h, #8 3895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v2.8b, v2.8h, #8 3905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v2.16b, v14.8h, #8 3915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn v3.8b, v3.8h, #8 3925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie rshrn2 v3.16b, v15.8h, #8 3935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 3945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 3955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_ADD zipped=0 3965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_ADD 3975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v0.16b, v0.16b, v8.16b 3985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v1.16b, v1.16b, v9.16b 3995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v2.16b, v2.16b, v10.16b 4005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqadd v3.16b, v3.16b, v11.16b 4015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 4025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_SUBTRACT zipped=0 4045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_SUBTRACT 4055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqsub v0.16b, v0.16b, v8.16b 4065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqsub v1.16b, v1.16b, v9.16b 4075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqsub v2.16b, v2.16b, v10.16b 4085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uqsub v3.16b, v3.16b, v11.16b 4095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 4105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_DIFFERENCE zipped=0 4125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_DIFFERENCE 4135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uabd v0.16b, v0.16b, v8.16b 4145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uabd v1.16b, v1.16b, v9.16b 4155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uabd v2.16b, v2.16b, v10.16b 4165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uabd v3.16b, v3.16b, v11.16b 4175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 4185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define params_XOR zipped=0 4205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro blend_kernel_XOR 4215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie eor v0.16b, v0.16b, v8.16b 4225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie eor v1.16b, v1.16b, v9.16b 4235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie eor v2.16b, v2.16b, v10.16b 4245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie eor v3.16b, v3.16b, v11.16b 4255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 4265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* Define the wrapper code which will load and store the data, iterate the 4295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * correct number of times, and safely handle the remainder at the end of the 4305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * loop. Various sections of assembly code are dropped or substituted for 4315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * simpler operations if they're not needed. 4325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 4335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1 4345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.if \nowrap 4355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie \kernel 4365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.else 4375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie sub x3, sp, #32 4385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie sub sp, sp, #64 4395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v8.1d - v11.1d}, [sp] 4405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v12.1d - v15.1d}, [x3] 4415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie subs x2, x2, #64 4425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie b 2f 4435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.align 4 4445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: 4455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst 4465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \zipped 4475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ld4 {v0.16b - v3.16b}, [x0] 4485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .else 4495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ld1 {v0.16b - v3.16b}, [x0] 4505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc 4535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \zipped 4545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ld4 {v8.16b - v11.16b}, [x1], #64 4555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .else 4565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ld1 {v8.16b - v11.16b}, [x1], #64 4575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \pld 4605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#if 0 /* TODO: test this on real hardware */ 4615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; prfm PLDL1STRM, [x0, #192] ; .endif 4625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; prfm PLDL1STRM, [x1, #192] ; .endif 4635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#endif 4645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie \kernel 4675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie subs x2, x2, #64 4695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \zipped 4705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st4 {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64 4715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .else 4725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64 4735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 4745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie2: bge 1b 4765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie adds x2, x2, #64 4775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie beq 2f 4785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie /* To handle the tail portion of the data (something less than 64 4805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * bytes) load small power-of-two chunks into working registers. It 4815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * doesn't matter where they end up in the register; the same process 4825d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * will store them back out using the same positions and the operations 4835d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * don't require data to interact with its neighbours. 4845d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 4855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v0.16b, #0 4865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v1.16b, #0 4875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v2.16b, #0 4885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v3.16b, #0 4895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v8.16b, #0 4915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v9.16b, #0 4925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v10.16b, #0 4935d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie movi v11.16b, #0 4945d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 4955d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie tbz x2, #5, 1f 4965d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; ld1 {v2.16b,v3.16b}, [x0], #32 ; .endif 4975d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; ld1 {v10.16b,v11.16b}, [x1], #32 ; .endif 4985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #4, 1f 4995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; ld1 {v1.16b}, [x0], #16 ; .endif 5005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; ld1 {v9.16b}, [x1], #16 ; .endif 5015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #3, 1f 5025d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; ld1 {v0.d}[1], [x0], #8 ; .endif 5035d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; ld1 {v8.d}[1], [x1], #8 ; .endif 5045d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #2, 1f 5055d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; ld1 {v0.s}[1], [x0], #4 ; .endif 5065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; ld1 {v8.s}[1], [x1], #4 ; .endif 5075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #1, 1f 5085d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; ld1 {v0.h}[1], [x0], #2 ; .endif 5095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; ld1 {v8.h}[1], [x1], #2 ; .endif 5105d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #0, 1f 5115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; ld1 {v0.b}[1], [x0], #1 ; .endif 5125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \ldsrc ; ld1 {v8.b}[1], [x1], #1 ; .endif 5135d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: 5145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .if \lddst ; sub x0, x0, x2 ; .endif 5155d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5165d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.if \zipped 5175d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie /* One small impediment in the process above is that some of the load 5185d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * operations can't perform byte-wise structure deinterleaving at the 5195d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * same time as loading only part of a register. So the data is loaded 5205d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * linearly and unpacked manually at this point. 5215d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 5225d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp1 v4.16b, v0.16b, v1.16b 5235d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp2 v5.16b, v0.16b, v1.16b 5245d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp1 v6.16b, v2.16b, v3.16b 5255d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp2 v7.16b, v2.16b, v3.16b 5265d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp1 v0.16b, v4.16b, v6.16b 5275d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp2 v2.16b, v4.16b, v6.16b 5285d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp1 v1.16b, v5.16b, v7.16b 5295d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp2 v3.16b, v5.16b, v7.16b 5305d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5315d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp1 v4.16b, v8.16b, v9.16b 5325d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp2 v5.16b, v8.16b, v9.16b 5335d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp1 v6.16b, v10.16b, v11.16b 5345d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp2 v7.16b, v10.16b, v11.16b 5355d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp1 v8.16b, v4.16b, v6.16b 5365d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp2 v10.16b, v4.16b, v6.16b 5375d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp1 v9.16b, v5.16b, v7.16b 5385d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie uzp2 v11.16b, v5.16b, v7.16b 5395d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5405d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie \kernel 5415d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5425d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie zip1 v4.16b, v0.16b, v2.16b 5435d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie zip2 v6.16b, v0.16b, v2.16b 5445d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie zip1 v5.16b, v1.16b, v3.16b 5455d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie zip2 v7.16b, v1.16b, v3.16b 5465d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie zip1 v0.16b, v4.16b, v5.16b 5475d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie zip2 v1.16b, v4.16b, v5.16b 5485d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie zip1 v2.16b, v6.16b, v7.16b 5495d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie zip2 v3.16b, v6.16b, v7.16b 5505d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .else 5515d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie \kernel 5525d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie .endif 5535d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5545d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie tbz x2, #5, 1f 5555d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v2.16b,v3.16b}, [x0], #32 5565d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #4, 1f 5575d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v1.16b}, [x0], #16 5585d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #3, 1f 5595d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v0.d}[1], [x0], #8 5605d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #2, 1f 5615d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v0.s}[1], [x0], #4 5625d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #1, 1f 5635d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v0.h}[1], [x0], #2 5645d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie1: tbz x2, #0, 2f 5655d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie st1 {v0.b}[1], [x0], #1 5665d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie2: ld1 {v8.1d - v11.1d}, [sp], #32 5675d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ld1 {v12.1d - v15.1d}, [sp], #32 5685d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endif 5695d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie mov x0, #0 5705d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ret 5715d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.endm 5725d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5735d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5745d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* produce list of blend_line_XX() functions; each function uses the wrap_line 5755d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * macro, passing it the name of the operation macro it wants along with 5765d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * optional parameters to remove unnecessary operations. 5775d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 5785d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ; 5795d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie BLEND_LIST(BLEND_X) 5805d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#undef BLEND_X 5815d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 582644d5943a78b3d84a21a617e7fd7fcd8cff4500aMiao Wang#define BLEND_X(d, n) .set tablesize, d+1 ; 583644d5943a78b3d84a21a617e7fd7fcd8cff4500aMiao Wang BLEND_LIST(BLEND_X) 584644d5943a78b3d84a21a617e7fd7fcd8cff4500aMiao Wang#undef BLEND_X 5855d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 5865d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie/* int rsdIntrinsicBlend_K( 5875d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * uchar4 *out, // x0 5885d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * uchar4 const *in, // x1 5895d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * int slot, // x2 5905d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * size_t xstart, // x3 5915d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie * size_t xend); // x4 5925d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie */ 5935d06919bc8019322180ea34768a7a4137fa64d11Simon HosieENTRY(rsdIntrinsicBlend_K) 5942b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie adr x5, 2f 595644d5943a78b3d84a21a617e7fd7fcd8cff4500aMiao Wang cmp w2, tablesize >> 1 5962b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie bhs 1f 5972b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie ldrsh x6, [x5, w2, uxtw #1] 5985d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie add x0, x0, w3, uxtw #2 5995d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie add x1, x1, w3, uxtw #2 6005d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie sub w2, w4, w3 6015d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ubfiz x2, x2, #2, #32 /* TODO: fix */ 6022b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie cbz x6, 1f 6032b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie add x6, x5, x6 6042b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie br x6 6052b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie1: mov x0, #-1 6065d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie ret 6075d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 6082b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie2: 6095d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie.set off,0 6102b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie#define BLEND_X(d, n) .rept d-off ; .hword 0 ; .endr ; .hword blend_line_##n - 2b ; .set off, d+1 ; 6115d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie BLEND_LIST(BLEND_X) 6125d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie#undef BLEND_X 6132b54b28c8235fb762fbafd88a3ef7857053987b3Simon Hosie3: 6145d06919bc8019322180ea34768a7a4137fa64d11Simon Hosie 6155d06919bc8019322180ea34768a7a4137fa64d11Simon HosieEND(rsdIntrinsicBlend_K) 616