1446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* 2446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Copyright (C) 2014 The Android Open Source Project 3446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 4446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License"); 5446788007efe0a673d0366284026adfa17b36fedSimon Hosie * you may not use this file except in compliance with the License. 6446788007efe0a673d0366284026adfa17b36fedSimon Hosie * You may obtain a copy of the License at 7446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 8446788007efe0a673d0366284026adfa17b36fedSimon Hosie * http://www.apache.org/licenses/LICENSE-2.0 9446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 10446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Unless required by applicable law or agreed to in writing, software 11446788007efe0a673d0366284026adfa17b36fedSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS, 12446788007efe0a673d0366284026adfa17b36fedSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13446788007efe0a673d0366284026adfa17b36fedSimon Hosie * See the License for the specific language governing permissions and 14446788007efe0a673d0366284026adfa17b36fedSimon Hosie * limitations under the License. 15446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 16446788007efe0a673d0366284026adfa17b36fedSimon Hosie 17446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: 18ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie#define PRIVATE(f) .text; .align 4; .type f,#function; f: 19446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define END(f) .size f, .-f; 20446788007efe0a673d0366284026adfa17b36fedSimon Hosie 21446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set FRACTION_BITS, 7 22446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set MAX_R, 25 23446788007efe0a673d0366284026adfa17b36fedSimon Hosie 24446788007efe0a673d0366284026adfa17b36fedSimon Hosie 25446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* A quick way of making a line of code conditional on some other condition. 26446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with 27446788007efe0a673d0366284026adfa17b36fedSimon Hosie * `ifcc`: 28446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 29446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro ifcc zzz:vararg 30446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if cc 31446788007efe0a673d0366284026adfa17b36fedSimon Hosie \zzz 32446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif 33446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 34446788007efe0a673d0366284026adfa17b36fedSimon Hosie 35446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fetch 16 columns of bytes (regardless of image format), convolve these 36446788007efe0a673d0366284026adfa17b36fedSimon Hosie * vertically, and leave them in the register file. If working near the top or 37446788007efe0a673d0366284026adfa17b36fedSimon Hosie * bottom of an image then clamp the addressing while loading the data in. 38446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 39446788007efe0a673d0366284026adfa17b36fedSimon Hosie * The convolution is fully unrolled for windows up to max_r, with the 40446788007efe0a673d0366284026adfa17b36fedSimon Hosie * outermost edges calculated first. This way it's possible to branch directly 41446788007efe0a673d0366284026adfa17b36fedSimon Hosie * into the relevant part of the code for an arbitrary convolution radius. Two 42446788007efe0a673d0366284026adfa17b36fedSimon Hosie * variants of the loop are produced; one eliminates the clamping code for a 43446788007efe0a673d0366284026adfa17b36fedSimon Hosie * slight speed advantage. 44446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 45446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Where the macro is called with reg=x, the specified register is taken to 46446788007efe0a673d0366284026adfa17b36fedSimon Hosie * contain a pre-calculated pointer into one of the two loops. 47446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 48446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 49446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x1 -- src 50446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x2 -- pitch 51446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x5 -- r 52446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x6 -- rup 53446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x7 -- rdn 54446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x12 -- switch index 559107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie * v0-v3 -- coefficient table 56446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x13 = -pitch 57446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x15 = top-row in 58ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie * x19 = bottom-row in 59446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 60446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x1 += 16 619107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie * v10,v11 -- 16 convolved columns 62446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 63446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x10 = upper row pointer 64446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x11 = lower row pointer 659107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie * v12-v15 = temporary sums 66446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 67446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=x12 /*{{{*/ 68446788007efe0a673d0366284026adfa17b36fedSimon Hosie .ifc \reg,x12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif 69446788007efe0a673d0366284026adfa17b36fedSimon Hosie 70446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v15.16b}, [x1], #16 71446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x10, x15 72446788007efe0a673d0366284026adfa17b36fedSimon Hosie 73446788007efe0a673d0366284026adfa17b36fedSimon Hosie uxtl v14.8h, v15.8b 74446788007efe0a673d0366284026adfa17b36fedSimon Hosie// prfm PLDL1KEEP,[x1, #16] // TODO: confirm 75446788007efe0a673d0366284026adfa17b36fedSimon Hosie uxtl2 v15.8h, v15.16b 76446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \max_r < 16 // approximate 77446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc adr \reg, 1f 78446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 79446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc adrp \reg, 1f 80446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc add \reg, \reg, #:lo12:1f 81446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 82446788007efe0a673d0366284026adfa17b36fedSimon Hosie 83446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull v12.4s, v14.4h, v0.h[0] 84446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc sub \reg, \reg, x5, LSL #6 85446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull2 v13.4s, v14.8h, v0.h[0] 86ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie mov x11, x19 87446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull v14.4s, v15.4h, v0.h[0] 88446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc add \reg, \reg, x5, LSL #3 89446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull2 v15.4s, v15.8h, v0.h[0] 90446788007efe0a673d0366284026adfa17b36fedSimon Hosie br \reg 91446788007efe0a673d0366284026adfa17b36fedSimon Hosie 92446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irp rowclamp, 1, 0 93446788007efe0a673d0366284026adfa17b36fedSimon Hosie .set cc, \rowclamp 94446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 95446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irp dreg, 4, 3, 2, 1, 0 ; .irp lane, 7, 6, 5, 4, 3, 2, 1, 0 ; .irp doth, .h 96446788007efe0a673d0366284026adfa17b36fedSimon Hosie .set i, \dreg * 8 + \lane 97446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if 0 < i && i <= \max_r 98446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v10.16b}, [x10], x2 99446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc cmp x6, #i 100446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v11.16b}, [x11], x13 101446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc csel x10, x15, x10, lo 102446788007efe0a673d0366284026adfa17b36fedSimon Hosie uaddl v16.8h, v10.8b, v11.8b 103446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc cmp x7, #i 104446788007efe0a673d0366284026adfa17b36fedSimon Hosie uaddl2 v11.8h, v10.16b, v11.16b 105ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ifcc csel x11, x19, x11, lo 106446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v12.4s, v16.4h, v\dreg\doth[\lane] 107446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v13.4s, v16.8h, v\dreg\doth[\lane] 108446788007efe0a673d0366284026adfa17b36fedSimon Hosie// prfm PLDL1KEEP,[x10, #32] // TODO: confirm 109446788007efe0a673d0366284026adfa17b36fedSimon Hosienop 110446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v11.4h, v\dreg\doth[\lane] 111446788007efe0a673d0366284026adfa17b36fedSimon Hosie// prfm PLDL1KEEP,[x11, #32] // TODO: confirm 112446788007efe0a673d0366284026adfa17b36fedSimon Hosienop 113446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v11.8h, v\dreg\doth[\lane] 114446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 115446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr ; .endr ; .endr 116446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \rowclamp == 1 117446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1: \labelc : 118446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 2f 119446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 120446788007efe0a673d0366284026adfa17b36fedSimon Hosie 2: \labelnc : 121446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 122446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 123446788007efe0a673d0366284026adfa17b36fedSimon Hosie 124446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v10.4h, v12.4s, #16 - FRACTION_BITS 125446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x15, x15, #16 126446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn2 v10.8h, v13.4s, #16 - FRACTION_BITS 127ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x19, x19, #16 128446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v11.4h, v14.4s, #16 - FRACTION_BITS 129446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn2 v11.8h, v15.4s, #16 - FRACTION_BITS 130446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm /*}}}*/ 131446788007efe0a673d0366284026adfa17b36fedSimon Hosie 132446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Some portion of the convolution window (as much as will fit, and all of it 133446788007efe0a673d0366284026adfa17b36fedSimon Hosie * for the uchar1 cases) is kept in the register file to avoid unnecessary 134446788007efe0a673d0366284026adfa17b36fedSimon Hosie * memory accesses. This forces the horizontal loops to be unrolled because 135446788007efe0a673d0366284026adfa17b36fedSimon Hosie * there's no indexed addressing into the register file. 136446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 137446788007efe0a673d0366284026adfa17b36fedSimon Hosie * As in the fetch macro, the operations are ordered from outside to inside, so 138446788007efe0a673d0366284026adfa17b36fedSimon Hosie * that jumping into the middle of the block bypasses the unwanted window taps. 139446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 140446788007efe0a673d0366284026adfa17b36fedSimon Hosie * There are several variants of the macro because of the fixed offets of the 141446788007efe0a673d0366284026adfa17b36fedSimon Hosie * taps -- the wider the maximum radius the further the centre tap is from the 142446788007efe0a673d0366284026adfa17b36fedSimon Hosie * most recently fetched data. This means that pre-filling the window requires 143446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more data that won't be used and it means that rotating the window involves 144446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more mov operations. 145446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 146ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie * When the buffer gets too big the buffer at [x9] is used. 147446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 148446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 1499107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie * v16-v31,v4-v11 -- convoltion window 150ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie * x9 -- pointer to additional convolution window data 151446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 152ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie * x9 -- updated buffer pointer (if used) 153446788007efe0a673d0366284026adfa17b36fedSimon Hosie * d31 -- result to be stored 154446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 155ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie * x12 -- temp buffer pointer 1569107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie * v12-v13 -- temporaries for load and vext operations. 1579107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie * v14-v15 -- intermediate sums 158446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 159446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST1 8, 16 160446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_8/*{{{*/ 161446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull v14.4s, v9.4h, v0.h[0] 162446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull2 v15.4s, v9.8h, v0.h[0] 163446788007efe0a673d0366284026adfa17b36fedSimon Hosie 164ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie adr x16, 100f 165ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldrsh x12, [x16, x5, LSL #1] 166ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x12, x12, x16 167446788007efe0a673d0366284026adfa17b36fedSimon Hosie br x12 168ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie 100: .hword -4 169ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 101f-100b 170ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 102f-100b 171ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 103f-100b 172ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 104f-100b 173ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 105f-100b 174ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 106f-100b 175ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 107f-100b 176ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 108f-100b 177446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 178446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: umlal v14.4s, v8.4h, v1.h[0] 179446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v8.8h, v1.h[0] 180446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v10.4h, v1.h[0] 181446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v10.8h, v1.h[0] 182446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: ext v12.16b, v8.16b, v9.16b, #1*2 183446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #7*2 184446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[7] 185446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[7] 186446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[7] 187446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[7] 188446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: ext v12.16b, v8.16b, v9.16b, #2*2 189446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #6*2 190446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[6] 191446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[6] 192446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[6] 193446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[6] 194446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: ext v12.16b, v8.16b, v9.16b, #3*2 195446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #5*2 196446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[5] 197446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[5] 198446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[5] 199446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[5] 200446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: //ext v12.16b, v8.16b, v9.16b, #4*2 201446788007efe0a673d0366284026adfa17b36fedSimon Hosie //ext v13.16b, v9.16b, v10.16b, #4*2 202446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v8.8h, v0.h[4] 203446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v9.4h, v0.h[4] 204446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v9.8h, v0.h[4] 205446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v10.4h, v0.h[4] 206446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: ext v12.16b, v8.16b, v9.16b, #5*2 207446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #3*2 208446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[3] 209446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[3] 210446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[3] 211446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[3] 212446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: ext v12.16b, v8.16b, v9.16b, #6*2 213446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #2*2 214446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[2] 215446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[2] 216446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[2] 217446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[2] 218446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: ext v12.16b, v8.16b, v9.16b, #7*2 219446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #1*2 220446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[1] 221446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[1] 222446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[1] 223446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[1] 224446788007efe0a673d0366284026adfa17b36fedSimon Hosie 225446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v14.4h, v14.4s, #16 226446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn2 v14.8h, v15.4s, #16 227446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v15.8b, v14.8h, #FRACTION_BITS 228446788007efe0a673d0366284026adfa17b36fedSimon Hosie 229446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v8.16b, v9.16b 230446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v9.16b, v10.16b 231446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v10.16b, v11.16b 232446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 233446788007efe0a673d0366284026adfa17b36fedSimon Hosie 234446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_16/*{{{*/ 235446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull v14.4s, v8.4h, v0.h[0] 236446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull2 v15.4s, v8.8h, v0.h[0] 237446788007efe0a673d0366284026adfa17b36fedSimon Hosie 238ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie adr x16, 100f 239ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldrsh x12, [x16, x5, LSL #1] 240ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x12, x12, x16 241446788007efe0a673d0366284026adfa17b36fedSimon Hosie br x12 242ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie 100: .hword -4 243ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 101f-100b 244ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 102f-100b 245ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 103f-100b 246ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 104f-100b 247ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 105f-100b 248ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 106f-100b 249ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 107f-100b 250ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 108f-100b 251ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 109f-100b 252ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 110f-100b 253ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 111f-100b 254ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 112f-100b 255ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 113f-100b 256ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 114f-100b 257ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 115f-100b 258ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 116f-100b 259446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 260446788007efe0a673d0366284026adfa17b36fedSimon Hosie 116: //ext v12.16b, v6.16b, v7.16b, #0*2 261446788007efe0a673d0366284026adfa17b36fedSimon Hosie //ext v13.16b, v10.16b, v11.16b, #0*2 262446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v6.4h, v2.h[0] 263446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v6.8h, v2.h[0] 264446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v10.4h, v2.h[0] 265446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v10.8h, v2.h[0] 266446788007efe0a673d0366284026adfa17b36fedSimon Hosie 115: ext v12.16b, v6.16b, v7.16b, #1*2 267446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #7*2 268446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[7] 269446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[7] 270446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[7] 271446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[7] 272446788007efe0a673d0366284026adfa17b36fedSimon Hosie 114: ext v12.16b, v6.16b, v7.16b, #2*2 273446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #6*2 274446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[6] 275446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[6] 276446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[6] 277446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[6] 278446788007efe0a673d0366284026adfa17b36fedSimon Hosie 113: ext v12.16b, v6.16b, v7.16b, #3*2 279446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #5*2 280446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[5] 281446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[5] 282446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[5] 283446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[5] 284446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: //ext v12.16b, v6.16b, v7.16b, #4*2 285446788007efe0a673d0366284026adfa17b36fedSimon Hosie //ext v13.16b, v9.16b, v10.16b, #4*2 286446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v6.8h, v1.h[4] 287446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v7.4h, v1.h[4] 288446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v9.8h, v1.h[4] 289446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v10.4h, v1.h[4] 290446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: ext v12.16b, v6.16b, v7.16b, #5*2 291446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #3*2 292446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[3] 293446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[3] 294446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[3] 295446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[3] 296446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: ext v12.16b, v6.16b, v7.16b, #6*2 297446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #2*2 298446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[2] 299446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[2] 300446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[2] 301446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[2] 302446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: ext v12.16b, v6.16b, v7.16b, #7*2 303446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #1*2 304446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[1] 305446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[1] 306446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[1] 307446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[1] 308446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: //ext v12.16b, v7.16b, v8.16b, #0*2 309446788007efe0a673d0366284026adfa17b36fedSimon Hosie //ext v13.16b, v9.16b, v10.16b, #0*2 310446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v7.4h, v1.h[0] 311446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v7.8h, v1.h[0] 312446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v9.4h, v1.h[0] 313446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v9.8h, v1.h[0] 314446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: ext v12.16b, v7.16b, v8.16b, #1*2 315446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #7*2 316446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[7] 317446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[7] 318446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[7] 319446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[7] 320446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: ext v12.16b, v7.16b, v8.16b, #2*2 321446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #6*2 322446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[6] 323446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[6] 324446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[6] 325446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[6] 326446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: ext v12.16b, v7.16b, v8.16b, #3*2 327446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #5*2 328446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[5] 329446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[5] 330446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[5] 331446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[5] 332446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: //ext v12.16b, v7.16b, v8.16b, #4*2 333446788007efe0a673d0366284026adfa17b36fedSimon Hosie //ext v13.16b, v8.16b, v9.16b, #4*2 334446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v7.8h, v0.h[4] 335446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v8.4h, v0.h[4] 336446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v8.8h, v0.h[4] 337446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v9.4h, v0.h[4] 338446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: ext v12.16b, v7.16b, v8.16b, #5*2 339446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #3*2 340446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[3] 341446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[3] 342446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[3] 343446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[3] 344446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: ext v12.16b, v7.16b, v8.16b, #6*2 345446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #2*2 346446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[2] 347446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[2] 348446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[2] 349446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[2] 350446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: ext v12.16b, v7.16b, v8.16b, #7*2 351446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #1*2 352446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[1] 353446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[1] 354446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[1] 355446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[1] 356446788007efe0a673d0366284026adfa17b36fedSimon Hosie 357446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v14.4h, v14.4s, #16 358446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn2 v14.8h, v15.4s, #16 359446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v15.8b, v14.8h, #FRACTION_BITS 360446788007efe0a673d0366284026adfa17b36fedSimon Hosie 361446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v6.16b, v7.16b 362446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v7.16b, v8.16b 363446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v8.16b, v9.16b 364446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v9.16b, v10.16b 365446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v10.16b, v11.16b 366446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 367446788007efe0a673d0366284026adfa17b36fedSimon Hosie 368446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_25/*{{{*/ 369446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v12.16b, v6.16b, v7.16b, #7*2 370446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull v14.4s, v12.4h, v0.h[0] 371446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull2 v15.4s, v12.8h, v0.h[0] 372446788007efe0a673d0366284026adfa17b36fedSimon Hosie 373ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie adr x16, 100f 374ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldrsh x12, [x16, x5, LSL #1] 375ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x12, x12, x16 376446788007efe0a673d0366284026adfa17b36fedSimon Hosie br x12 377ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie 100: .hword -4 378ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 101f-100b 379ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 102f-100b 380ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 103f-100b 381ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 104f-100b 382ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 105f-100b 383ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 106f-100b 384ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 107f-100b 385ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 108f-100b 386ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 109f-100b 387ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 110f-100b 388ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 111f-100b 389ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 112f-100b 390ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 113f-100b 391ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 114f-100b 392ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 115f-100b 393ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 116f-100b 394ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 117f-100b 395ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 118f-100b 396ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 119f-100b 397ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 120f-100b 398ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 121f-100b 399ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 122f-100b 400ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 123f-100b 401ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 124f-100b 402ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 125f-100b 403446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 4044bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 125: ext v12.16b, v31.16b, v4.16b, #6*2 405446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v10.16b, v11.16b, #0*2 406446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v3.h[1] 407446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v3.h[1] 408446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v3.h[1] 409446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v3.h[1] 4109107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie 124: ext v12.16b, v31.16b, v4.16b, #7*2 411446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #7*2 412446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v3.h[0] 413446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v3.h[0] 414446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v3.h[0] 415446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v3.h[0] 416446788007efe0a673d0366284026adfa17b36fedSimon Hosie 123: ext v12.16b, v4.16b, v5.16b, #0*2 417446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #6*2 418446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[7] 419446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[7] 420446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v2.h[7] 421446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v2.h[7] 422446788007efe0a673d0366284026adfa17b36fedSimon Hosie 122: ext v12.16b, v4.16b, v5.16b, #1*2 423446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #5*2 424446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[6] 425446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[6] 426446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v2.h[6] 427446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v2.h[6] 428446788007efe0a673d0366284026adfa17b36fedSimon Hosie 121: ext v12.16b, v4.16b, v5.16b, #2*2 429446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #4*2 430446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[5] 431446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[5] 432446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v2.h[5] 433446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v2.h[5] 434446788007efe0a673d0366284026adfa17b36fedSimon Hosie 120: ext v12.16b, v4.16b, v5.16b, #3*2 435446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #3*2 436446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[4] 437446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[4] 438446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v2.h[4] 439446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v2.h[4] 440446788007efe0a673d0366284026adfa17b36fedSimon Hosie 119: ext v12.16b, v4.16b, v5.16b, #4*2 441446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #2*2 442446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[3] 443446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[3] 444446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v2.h[3] 445446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v2.h[3] 446446788007efe0a673d0366284026adfa17b36fedSimon Hosie 118: ext v12.16b, v4.16b, v5.16b, #5*2 447446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #1*2 448446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[2] 449446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[2] 450446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v2.h[2] 451446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v2.h[2] 452446788007efe0a673d0366284026adfa17b36fedSimon Hosie 117: ext v12.16b, v4.16b, v5.16b, #6*2 453446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v9.16b, v10.16b, #0*2 454446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[1] 455446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[1] 456446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v2.h[1] 457446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v2.h[1] 458446788007efe0a673d0366284026adfa17b36fedSimon Hosie 116: ext v12.16b, v4.16b, v5.16b, #7*2 459446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #7*2 460446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[0] 461446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[0] 462446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v2.h[0] 463446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v2.h[0] 464446788007efe0a673d0366284026adfa17b36fedSimon Hosie 115: ext v12.16b, v5.16b, v6.16b, #0*2 465446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #6*2 466446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[7] 467446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[7] 468446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[7] 469446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[7] 470446788007efe0a673d0366284026adfa17b36fedSimon Hosie 114: ext v12.16b, v5.16b, v6.16b, #1*2 471446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #5*2 472446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[6] 473446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[6] 474446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[6] 475446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[6] 476446788007efe0a673d0366284026adfa17b36fedSimon Hosie 113: ext v12.16b, v5.16b, v6.16b, #2*2 477446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #4*2 478446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[5] 479446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[5] 480446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[5] 481446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[5] 482446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: ext v12.16b, v5.16b, v6.16b, #3*2 483446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #3*2 484446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[4] 485446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[4] 486446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[4] 487446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[4] 488446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: ext v12.16b, v5.16b, v6.16b, #4*2 489446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #2*2 490446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[3] 491446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[3] 492446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[3] 493446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[3] 494446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: ext v12.16b, v5.16b, v6.16b, #5*2 495446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #1*2 496446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[2] 497446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[2] 498446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[2] 499446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[2] 500446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: ext v12.16b, v5.16b, v6.16b, #6*2 501446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v8.16b, v9.16b, #0*2 502446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[1] 503446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[1] 504446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[1] 505446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[1] 506446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: ext v12.16b, v5.16b, v6.16b, #7*2 507446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v7.16b, v8.16b, #7*2 508446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v1.h[0] 509446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v1.h[0] 510446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v1.h[0] 511446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v1.h[0] 512446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: ext v12.16b, v6.16b, v7.16b, #0*2 513446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v7.16b, v8.16b, #6*2 514446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[7] 515446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[7] 516446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[7] 517446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[7] 518446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: ext v12.16b, v6.16b, v7.16b, #1*2 519446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v7.16b, v8.16b, #5*2 520446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[6] 521446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[6] 522446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[6] 523446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[6] 524446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: ext v12.16b, v6.16b, v7.16b, #2*2 525446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v7.16b, v8.16b, #4*2 526446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[5] 527446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[5] 528446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[5] 529446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[5] 530446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: ext v12.16b, v6.16b, v7.16b, #3*2 531446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v7.16b, v8.16b, #3*2 532446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[4] 533446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[4] 534446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[4] 535446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[4] 536446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: ext v12.16b, v6.16b, v7.16b, #4*2 537446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v7.16b, v8.16b, #2*2 538446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[3] 539446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[3] 540446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[3] 541446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[3] 542446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: ext v12.16b, v6.16b, v7.16b, #5*2 543446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v7.16b, v8.16b, #1*2 544446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[2] 545446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[2] 546446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[2] 547446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[2] 548446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: ext v12.16b, v6.16b, v7.16b, #6*2 549446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v13.16b, v7.16b, v8.16b, #0*2 550446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v0.h[1] 551446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v0.h[1] 552446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v13.4h, v0.h[1] 553446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v13.8h, v0.h[1] 554446788007efe0a673d0366284026adfa17b36fedSimon Hosie 555446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v14.4h, v14.4s, #16 556446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn2 v14.8h, v15.4s, #16 557446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v15.8b, v14.8h, #FRACTION_BITS 558446788007efe0a673d0366284026adfa17b36fedSimon Hosie 5594bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v31.16b, v4.16b 560446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v4.16b, v5.16b 561446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v5.16b, v6.16b 562446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v6.16b, v7.16b 563446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v7.16b, v8.16b 564446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v8.16b, v9.16b 565446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v9.16b, v10.16b 566446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v10.16b, v11.16b 567446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 568446788007efe0a673d0366284026adfa17b36fedSimon Hosie 5694bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie#define TUNED_LIST4 6, 12, 20 570446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_6/*{{{*/ 571446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull v14.4s, v7.4h, v0.h[0] 572446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull2 v15.4s, v7.8h, v0.h[0] 573446788007efe0a673d0366284026adfa17b36fedSimon Hosie 574ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie adr x16, 100f 575ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldrsh x12, [x16, x5, LSL #1] 576ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x12, x12, x16 577446788007efe0a673d0366284026adfa17b36fedSimon Hosie br x12 578ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie 100: .hword -4 579ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 101f-100b 580ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 102f-100b 581ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 103f-100b 582ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 104f-100b 583ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 105f-100b 584ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 106f-100b 585446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 586446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: umlal v14.4s, v4.4h, v0.h[6] 587446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v4.8h, v0.h[6] 588446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v10.4h, v0.h[6] 589446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v10.8h, v0.h[6] 590446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: umlal2 v14.4s, v4.8h, v0.h[5] 591446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v5.4h, v0.h[5] 592446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v9.8h, v0.h[5] 593446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v10.4h, v0.h[5] 594446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: umlal v14.4s, v5.4h, v0.h[4] 595446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v5.8h, v0.h[4] 596446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v9.4h, v0.h[4] 597446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v9.8h, v0.h[4] 598446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: umlal2 v14.4s, v5.8h, v0.h[3] 599446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v6.4h, v0.h[3] 600446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v8.8h, v0.h[3] 601446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v9.4h, v0.h[3] 602446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: umlal v14.4s, v6.4h, v0.h[2] 603446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v6.8h, v0.h[2] 604446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v8.4h, v0.h[2] 605446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v8.8h, v0.h[2] 606446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: umlal2 v14.4s, v6.8h, v0.h[1] 607446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v7.4h, v0.h[1] 608446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v7.8h, v0.h[1] 609446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v8.4h, v0.h[1] 610446788007efe0a673d0366284026adfa17b36fedSimon Hosie 611446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v14.4h, v14.4s, #16 612446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn2 v14.8h, v15.4s, #16 613446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v15.8b, v14.8h, #FRACTION_BITS 614446788007efe0a673d0366284026adfa17b36fedSimon Hosie 615446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v4.16b, v5.16b 616446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v5.16b, v6.16b 617446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v6.16b, v7.16b 618446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v7.16b, v8.16b 619446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v8.16b, v9.16b 620446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v9.16b, v10.16b 621446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v10.16b, v11.16b 622446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 623446788007efe0a673d0366284026adfa17b36fedSimon Hosie 624446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_12/*{{{*/ 625446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull v14.4s, v4.4h, v0.h[0] 626446788007efe0a673d0366284026adfa17b36fedSimon Hosie umull2 v15.4s, v4.8h, v0.h[0] 627446788007efe0a673d0366284026adfa17b36fedSimon Hosie 628ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie adr x16, 100f 629ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldrsh x12, [x16, x5, LSL #1] 630ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x12, x12, x16 631446788007efe0a673d0366284026adfa17b36fedSimon Hosie br x12 632ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie 100: .hword -4 633ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 101f-100b 634ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 102f-100b 635ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 103f-100b 636ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 104f-100b 637ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 105f-100b 638ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 106f-100b 639ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 107f-100b 640ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 108f-100b 641ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 109f-100b 642ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 110f-100b 643ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 111f-100b 644ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 112f-100b 645446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 6464bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 112: umlal v14.4s, v26.4h, v1.h[4] 6474bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v26.8h, v1.h[4] 648446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v10.4h, v1.h[4] 649446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v10.8h, v1.h[4] 6504bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 111: umlal2 v14.4s, v26.8h, v1.h[3] 6514bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v27.4h, v1.h[3] 652446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v9.8h, v1.h[3] 653446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v10.4h, v1.h[3] 6544bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 110: umlal v14.4s, v27.4h, v1.h[2] 6554bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v27.8h, v1.h[2] 656446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v9.4h, v1.h[2] 657446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v9.8h, v1.h[2] 6584bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 109: umlal2 v14.4s, v27.8h, v1.h[1] 6594bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v28.4h, v1.h[1] 660446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v8.8h, v1.h[1] 661446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v9.4h, v1.h[1] 6624bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 108: umlal v14.4s, v28.4h, v1.h[0] 6634bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v28.8h, v1.h[0] 664446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v8.4h, v1.h[0] 665446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v8.8h, v1.h[0] 6664bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 107: umlal2 v14.4s, v28.8h, v0.h[7] 6674bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v29.4h, v0.h[7] 668446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v7.8h, v0.h[7] 669446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v8.4h, v0.h[7] 6704bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 106: umlal v14.4s, v29.4h, v0.h[6] 6714bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v29.8h, v0.h[6] 672446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v7.4h, v0.h[6] 673446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v7.8h, v0.h[6] 6744bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 105: umlal2 v14.4s, v29.8h, v0.h[5] 6754bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v30.4h, v0.h[5] 676446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v6.8h, v0.h[5] 677446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v7.4h, v0.h[5] 6784bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 104: umlal v14.4s, v30.4h, v0.h[4] 6794bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v30.8h, v0.h[4] 680446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v6.4h, v0.h[4] 681446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v6.8h, v0.h[4] 6824bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 103: umlal2 v14.4s, v30.8h, v0.h[3] 6834bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v31.4h, v0.h[3] 684446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v5.8h, v0.h[3] 685446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v6.4h, v0.h[3] 6864bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 102: umlal v14.4s, v31.4h, v0.h[2] 6874bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v31.8h, v0.h[2] 688446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v5.4h, v0.h[2] 689446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v5.8h, v0.h[2] 6904bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 101: umlal2 v14.4s, v31.8h, v0.h[1] 691446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v4.4h, v0.h[1] 692446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v4.8h, v0.h[1] 693446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v5.4h, v0.h[1] 694446788007efe0a673d0366284026adfa17b36fedSimon Hosie 695446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v14.4h, v14.4s, #16 696446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn2 v14.8h, v15.4s, #16 697446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v15.8b, v14.8h, #FRACTION_BITS 698446788007efe0a673d0366284026adfa17b36fedSimon Hosie 6994bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v26.16b, v27.16b 7004bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v27.16b, v28.16b 7014bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v28.16b, v29.16b 7024bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v29.16b, v30.16b 7034bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v30.16b, v31.16b 7044bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v31.16b, v4.16b 7054bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v4.16b, v5.16b 7064bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v5.16b, v6.16b 7074bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v6.16b, v7.16b 7084bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v7.16b, v8.16b 7094bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v8.16b, v9.16b 7104bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v9.16b, v10.16b 7114bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v10.16b, v11.16b 7124bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie.endm/*}}}*/ 7134bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 7144bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie.macro hconv4_20/*{{{*/ 7154bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umull v14.4s, v28.4h, v0.h[0] 7164bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umull2 v15.4s, v28.8h, v0.h[0] 7174bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 7184bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie adr x16, 100f 7194bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie ldrsh x12, [x16, x5, LSL #1] 7204bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie add x12, x12, x16 7214bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie br x12 7224bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 100: .hword -4 7234bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 101f-100b 7244bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 102f-100b 7254bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 103f-100b 7264bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 104f-100b 7274bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 105f-100b 7284bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 106f-100b 7294bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 107f-100b 7304bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 108f-100b 7314bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 109f-100b 7324bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 110f-100b 7334bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 111f-100b 7344bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 112f-100b 7354bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 113f-100b 7364bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 114f-100b 7374bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 115f-100b 7384bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 116f-100b 7394bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 117f-100b 7404bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 118f-100b 7414bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 119f-100b 7424bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .hword 120f-100b 7434bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .align 4 7444bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 7454bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 120: umlal v14.4s, v18.4h, v2.h[4] 7464bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v18.8h, v2.h[4] 7474bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v10.4h, v2.h[4] 7484bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v10.8h, v2.h[4] 7494bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 119: umlal2 v14.4s, v18.8h, v2.h[3] 7504bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v19.4h, v2.h[3] 7514bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v9.8h, v2.h[3] 7524bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v10.4h, v2.h[3] 7534bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 118: umlal v14.4s, v19.4h, v2.h[2] 7544bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v19.8h, v2.h[2] 7554bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v9.4h, v2.h[2] 7564bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v9.8h, v2.h[2] 7574bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 117: umlal2 v14.4s, v19.8h, v2.h[1] 7584bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v20.4h, v2.h[1] 7594bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v8.8h, v2.h[1] 7604bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v9.4h, v2.h[1] 7614bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 116: umlal v14.4s, v20.4h, v2.h[0] 7624bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v20.8h, v2.h[0] 7634bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v8.4h, v2.h[0] 7644bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v8.8h, v2.h[0] 7654bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 115: umlal2 v14.4s, v20.8h, v1.h[7] 7664bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v21.4h, v1.h[7] 7674bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v7.8h, v1.h[7] 7684bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v8.4h, v1.h[7] 7694bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 114: umlal v14.4s, v21.4h, v1.h[6] 7704bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v21.8h, v1.h[6] 7714bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v7.4h, v1.h[6] 7724bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v7.8h, v1.h[6] 7734bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 113: umlal2 v14.4s, v21.8h, v1.h[5] 7744bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v22.4h, v1.h[5] 7754bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v6.8h, v1.h[5] 7764bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v7.4h, v1.h[5] 7774bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 112: umlal v14.4s, v22.4h, v1.h[4] 7784bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v22.8h, v1.h[4] 7794bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v6.4h, v1.h[4] 7804bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v6.8h, v1.h[4] 7814bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 111: umlal2 v14.4s, v22.8h, v1.h[3] 7824bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v23.4h, v1.h[3] 7834bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v5.8h, v1.h[3] 7844bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v6.4h, v1.h[3] 7854bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 110: umlal v14.4s, v23.4h, v1.h[2] 7864bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v23.8h, v1.h[2] 7874bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v5.4h, v1.h[2] 7884bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v5.8h, v1.h[2] 7894bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 109: umlal2 v14.4s, v23.8h, v1.h[1] 7904bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v24.4h, v1.h[1] 7914bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v4.8h, v1.h[1] 7924bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v5.4h, v1.h[1] 7934bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 108: umlal v14.4s, v24.4h, v1.h[0] 7944bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v24.8h, v1.h[0] 7954bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v4.4h, v1.h[0] 7964bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v4.8h, v1.h[0] 7974bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 107: umlal2 v14.4s, v24.8h, v0.h[7] 7984bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v25.4h, v0.h[7] 7994bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v31.8h, v0.h[7] 8004bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v4.4h, v0.h[7] 8014bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 106: umlal v14.4s, v25.4h, v0.h[6] 8024bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v25.8h, v0.h[6] 8034bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v31.4h, v0.h[6] 8044bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v31.8h, v0.h[6] 8054bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 105: umlal2 v14.4s, v25.8h, v0.h[5] 8064bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v26.4h, v0.h[5] 8074bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v30.8h, v0.h[5] 8084bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v31.4h, v0.h[5] 8094bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 104: umlal v14.4s, v26.4h, v0.h[4] 8104bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v26.8h, v0.h[4] 8114bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v30.4h, v0.h[4] 8124bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v30.8h, v0.h[4] 8134bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 103: umlal2 v14.4s, v26.8h, v0.h[3] 8144bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v27.4h, v0.h[3] 8154bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v29.8h, v0.h[3] 8164bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v30.4h, v0.h[3] 8174bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 102: umlal v14.4s, v27.4h, v0.h[2] 8184bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v27.8h, v0.h[2] 8194bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v29.4h, v0.h[2] 8204bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v29.8h, v0.h[2] 8214bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 101: umlal2 v14.4s, v27.8h, v0.h[1] 8224bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v28.4h, v0.h[1] 8234bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v28.8h, v0.h[1] 8244bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v29.4h, v0.h[1] 8254bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 8264bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie uqrshrn v14.4h, v14.4s, #16 8274bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie uqrshrn2 v14.8h, v15.4s, #16 8284bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie uqrshrn v15.8b, v14.8h, #FRACTION_BITS 8294bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 8304bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v18.16b, v19.16b 8314bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v19.16b, v20.16b 8324bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v20.16b, v21.16b 8334bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v21.16b, v22.16b 8344bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v22.16b, v23.16b 8354bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v23.16b, v24.16b 8364bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v24.16b, v25.16b 8374bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v25.16b, v26.16b 8384bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v26.16b, v27.16b 8394bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v27.16b, v28.16b 8404bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v28.16b, v29.16b 8414bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v29.16b, v30.16b 8424bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v30.16b, v31.16b 8434bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v31.16b, v4.16b 844446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v4.16b, v5.16b 845446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v5.16b, v6.16b 846446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v6.16b, v7.16b 847446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v7.16b, v8.16b 848446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v8.16b, v9.16b 849446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v9.16b, v10.16b 850446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v10.16b, v11.16b 851446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 852446788007efe0a673d0366284026adfa17b36fedSimon Hosie 853446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_25/*{{{*/ 8544bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umull2 v14.4s, v25.8h, v0.h[0] 8554bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umull v15.4s, v26.4h, v0.h[0] 856446788007efe0a673d0366284026adfa17b36fedSimon Hosie 857ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie adr x16, 100f 858ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldrsh x12, [x16, x5, LSL #1] 859ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x12, x12, x16 860446788007efe0a673d0366284026adfa17b36fedSimon Hosie br x12 861ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie 100: .hword -4 862ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 101f-100b 863ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 102f-100b 864ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 103f-100b 865ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 104f-100b 866ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 105f-100b 867ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 106f-100b 868ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 107f-100b 869ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 108f-100b 870ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 109f-100b 871ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 110f-100b 872ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 111f-100b 873ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 112f-100b 874ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 113f-100b 875ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 114f-100b 876ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 115f-100b 877ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 116f-100b 878ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 117f-100b 879ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 118f-100b 880ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 119f-100b 881ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 120f-100b 882ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 121f-100b 883ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 122f-100b 884ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 123f-100b 885ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 124f-100b 886ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie .hword 125f-100b 887446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 8884bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 8894bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 125: ld1 {v12.8h}, [x9] 890446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v3.h[1] 891446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v3.h[1] 892446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v10.4h, v3.h[1] 893446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v10.8h, v3.h[1] 8944bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 124: add x12, x9, #0x08 8954bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 896446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v12.4h}, [x12], #8 8974bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 898446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v13.4h}, [x12] 899446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v3.h[0] 900446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v13.4h, v3.h[0] 9014bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v9.8h, v3.h[0] 902446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v10.4h, v3.h[0] 9034bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 123: add x12, x9, #0x10 9044bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 905446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v12.8h}, [x12] 906446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[7] 907446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[7] 9084bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v9.4h, v2.h[7] 9094bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v9.8h, v2.h[7] 9104bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 122: add x12, x9, #0x18 9114bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 912446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v12.4h}, [x12], #8 9134bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 914446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v13.4h}, [x12] 915446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[6] 916446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v13.4h, v2.h[6] 9174bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v8.8h, v2.h[6] 9184bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v9.4h, v2.h[6] 9194bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 121: add x12, x9, #0x20 9204bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 921446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v12.8h}, [x12] 922446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[5] 923446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[5] 9244bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v8.4h, v2.h[5] 9254bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v8.8h, v2.h[5] 9264bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 120: add x12, x9, #0x28 9274bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 928446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v12.4h}, [x12], #8 9294bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 930446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v13.4h}, [x12] 931446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[4] 932446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v13.4h, v2.h[4] 9334bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v7.8h, v2.h[4] 9344bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v8.4h, v2.h[4] 9354bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 119: add x12, x9, #0x30 9364bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 937446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v12.8h}, [x12] 938446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[3] 939446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v12.8h, v2.h[3] 9404bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v7.4h, v2.h[3] 9414bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v7.8h, v2.h[3] 9424bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 118: add x12, x9, #0x38 9434bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x12, x12, #0x40 9444bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie ld1 {v12.4h}, [x12] 945446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v12.4h, v2.h[2] 9464bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v17.4h, v2.h[2] 9474bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v6.8h, v2.h[2] 9484bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v7.4h, v2.h[2] 9494bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 117: umlal v14.4s, v17.4h, v2.h[1] 9504bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v17.8h, v2.h[1] 9514bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v6.4h, v2.h[1] 9524bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v6.8h, v2.h[1] 9534bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 116: umlal2 v14.4s, v17.8h, v2.h[0] 9544bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v18.4h, v2.h[0] 9554bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v5.8h, v2.h[0] 9564bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v6.4h, v2.h[0] 9574bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 115: umlal v14.4s, v18.4h, v1.h[7] 9584bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v18.8h, v1.h[7] 9594bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v5.4h, v1.h[7] 9604bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v5.8h, v1.h[7] 9614bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 114: umlal2 v14.4s, v18.8h, v1.h[6] 9624bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v19.4h, v1.h[6] 963446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v14.4s, v4.8h, v1.h[6] 9644bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v5.4h, v1.h[6] 9654bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 113: umlal v14.4s, v19.4h, v1.h[5] 9664bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v19.8h, v1.h[5] 967446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v14.4s, v4.4h, v1.h[5] 968446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal2 v15.4s, v4.8h, v1.h[5] 9694bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 112: umlal2 v14.4s, v19.8h, v1.h[4] 9704bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v20.4h, v1.h[4] 9714bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v31.8h, v1.h[4] 972446788007efe0a673d0366284026adfa17b36fedSimon Hosie umlal v15.4s, v4.4h, v1.h[4] 9734bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 111: umlal v14.4s, v20.4h, v1.h[3] 9744bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v20.8h, v1.h[3] 9754bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v31.4h, v1.h[3] 9764bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v31.8h, v1.h[3] 9774bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 110: umlal2 v14.4s, v20.8h, v1.h[2] 9784bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v21.4h, v1.h[2] 9794bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v30.8h, v1.h[2] 9804bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v31.4h, v1.h[2] 9814bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 109: umlal v14.4s, v21.4h, v1.h[1] 9824bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v21.8h, v1.h[1] 9834bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v30.4h, v1.h[1] 9844bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v30.8h, v1.h[1] 9854bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 108: umlal2 v14.4s, v21.8h, v1.h[0] 9864bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v22.4h, v1.h[0] 9874bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v29.8h, v1.h[0] 9884bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v30.4h, v1.h[0] 9894bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 107: umlal v14.4s, v22.4h, v0.h[7] 9904bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v22.8h, v0.h[7] 9914bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v29.4h, v0.h[7] 9924bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v29.8h, v0.h[7] 9934bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 106: umlal2 v14.4s, v22.8h, v0.h[6] 9944bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v23.4h, v0.h[6] 9954bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v28.8h, v0.h[6] 9964bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v29.4h, v0.h[6] 9974bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 105: umlal v14.4s, v23.4h, v0.h[5] 9984bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v23.8h, v0.h[5] 9994bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v28.4h, v0.h[5] 10004bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v28.8h, v0.h[5] 10014bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 104: umlal2 v14.4s, v23.8h, v0.h[4] 10024bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v24.4h, v0.h[4] 10034bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v27.8h, v0.h[4] 10044bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v28.4h, v0.h[4] 10054bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 103: umlal v14.4s, v24.4h, v0.h[3] 10064bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v24.8h, v0.h[3] 10074bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v27.4h, v0.h[3] 10084bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v27.8h, v0.h[3] 10094bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 102: umlal2 v14.4s, v24.8h, v0.h[2] 10104bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v25.4h, v0.h[2] 10114bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v14.4s, v26.8h, v0.h[2] 10124bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v15.4s, v27.4h, v0.h[2] 10134bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie 101: umlal v14.4s, v25.4h, v0.h[1] 10144bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v25.8h, v0.h[1] 10154bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal v14.4s, v26.4h, v0.h[1] 10164bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie umlal2 v15.4s, v26.8h, v0.h[1] 1017446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1018446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v14.4h, v14.4s, #16 1019446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn2 v14.8h, v15.4s, #16 1020446788007efe0a673d0366284026adfa17b36fedSimon Hosie uqrshrn v15.8b, v14.8h, #FRACTION_BITS 1021446788007efe0a673d0366284026adfa17b36fedSimon Hosie 10224bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie st1 {v17.16b}, [x9], #16 10234bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x9, x9, #0x40 10244bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v17.16b, v18.16b 10254bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v18.16b, v19.16b 10264bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v19.16b, v20.16b 10274bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v20.16b, v21.16b 10284bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v21.16b, v22.16b 10294bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v22.16b, v23.16b 10304bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v23.16b, v24.16b 10314bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v24.16b, v25.16b 10324bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v25.16b, v26.16b 10334bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v26.16b, v27.16b 10344bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v27.16b, v28.16b 10354bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v28.16b, v29.16b 10364bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v29.16b, v30.16b 10374bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v30.16b, v31.16b 10384bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov v31.16b, v4.16b 1039446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v4.16b, v5.16b 1040446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v5.16b, v6.16b 1041446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v6.16b, v7.16b 1042446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v7.16b, v8.16b 1043446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v8.16b, v9.16b 1044446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v9.16b, v10.16b 1045446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v10.16b, v11.16b 1046446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 1047446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1048446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Dedicated function wrapper for the fetch macro, for the cases where 1049446788007efe0a673d0366284026adfa17b36fedSimon Hosie * performance isn't that important, to keep code size down. 1050446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1051ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon HosiePRIVATE(fetch_generic_asm) 1052446788007efe0a673d0366284026adfa17b36fedSimon Hosie stp x10, x11, [sp, #-16]! 1053446788007efe0a673d0366284026adfa17b36fedSimon Hosie fetch 1054446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldp x10, x11, [sp], #16 1055446788007efe0a673d0366284026adfa17b36fedSimon Hosie ret 1056446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(fetch_generic_asm) 1057446788007efe0a673d0366284026adfa17b36fedSimon Hosie 10589107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie/* Given values in v10 and v11, and an index in x11, sweep the (x11&15)th value 1059446788007efe0a673d0366284026adfa17b36fedSimon Hosie * across to fill the rest of the register pair. Used for filling the right 1060446788007efe0a673d0366284026adfa17b36fedSimon Hosie * hand edge of the window when starting too close to the right hand edge of 1061446788007efe0a673d0366284026adfa17b36fedSimon Hosie * the image. 10625eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * Also returns a dup-ed copy of the last element in v12 for the tail-fill 10635eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * case (this happens incidentally in common path, but must be done 10645eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * deliberately in the fast-out path). 1065446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 10665eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(prefetch_clampright1) 10675eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ands x12, x11, #15 10685eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie beq 1f 10695eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub x12, x12, #1 10705eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #64 10715eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v10.8h,v11.8h}, [sp] 10725eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add x12, sp, x12, LSL #1 10735eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ld1r {v12.8h}, [x12] 10745eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v12.8h}, [x12], #16 10755eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v12.8h}, [x12] 10765eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ld1 {v10.8h,v11.8h}, [sp] 10775eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add sp, sp, #64 1078446788007efe0a673d0366284026adfa17b36fedSimon Hosie ret 10795eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie1: dup v12.8h, v11.h[7] 10805eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ret 10815eb463f2c28cf161fe31b8078908a7b01198516fSimon HosieEND(prefetch_clampright1) 1082446788007efe0a673d0366284026adfa17b36fedSimon Hosie 10835eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(prefetch_clampright4) 10845eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ands x12, x11, #15 10855eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie beq 1f 10865eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub x12, x12, #4 10875eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #64 10885eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v10.8h,v11.8h}, [sp] 10895eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add x12, sp, x12, LSL #1 10905eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ld1r {v12.2d}, [x12] 10915eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v12.8h}, [x12], #16 10925eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v12.8h}, [x12] 10935eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ld1 {v10.8h,v11.8h}, [sp] 10945eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add sp, sp, #64 10955eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ret 1096446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: dup v12.2d, v11.d[1] 1097446788007efe0a673d0366284026adfa17b36fedSimon Hosie ret 10985eb463f2c28cf161fe31b8078908a7b01198516fSimon HosieEND(prefetch_clampright4) 1099446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1100446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1101446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Helpers for prefetch, below. 1102446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1103446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_out qa, qb, store, qsa, qsb, qsb_hi 11044bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .if \store == 2 1105446788007efe0a673d0366284026adfa17b36fedSimon Hosie .ifc \qsa,\qsb 1106446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {\qsa}, [x9], #16 1107446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {\qsb}, [x9], #16 1108446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1109446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {\qsa,\qsb}, [x9], #32 1110446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 11114bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie .elseif \store == 1 11124bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x9, x9, #0x40 11134bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie st1 {\qsa}, [x9], #16 11144bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie mov \qb, \qsb 1115446788007efe0a673d0366284026adfa17b36fedSimon Hosie .elseif \store == 0 1116446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov \qa, \qsa 1117446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov \qb, \qsb 1118446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1119446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1120446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1121446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_one qa, qb, rem, c, store=0, step=1 1122446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set i, (need - 16) - \rem 1123446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if i >= 0 1124446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: cmp x10, #i+16 1125446788007efe0a673d0366284026adfa17b36fedSimon Hosie blo 2f 1126446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, v9.16b, v9.16b, v9.d[1] 1127446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 1f 1128446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: cmp x11, #i+16 1129446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls 3f 1130446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, v10.16b, v11.16b, v11.d[1] 1131446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1132446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 2f 11335eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie3: bl prefetch_clampright\step 1134446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, v10.16b, v11.16b, v11.d[1] 1135446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: b 4f+4 11365eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie //v12 contains pad word from prefetch_clampright call 1137446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, v12.16b, v12.16b, v12.d[1] 1138446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \rem > 0 1139446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 4f+4 1140446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1141446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: 1142446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: 1143446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: 1144446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: nop 1145446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1146446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif 1147446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1148446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1149446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fill the convolution window with context data. The aim here is to load 1150446788007efe0a673d0366284026adfa17b36fedSimon Hosie * exactly rlf + rrt columns, and in the main loop to read as many columns as 1151446788007efe0a673d0366284026adfa17b36fedSimon Hosie * will be written. This is complicated by the need to handle cases when the 1152446788007efe0a673d0366284026adfa17b36fedSimon Hosie * input starts very close to the left or right (or both) edges of the image, 1153446788007efe0a673d0366284026adfa17b36fedSimon Hosie * and where these do not fall on 16-byte boundaries. 1154446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 1155446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 1156446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x1 -- src 1157446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x2 -- pitch 1158446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x3 -- count 1159446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x4 -- inlen 1160446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x5 -- r 1161446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x6 -- rup 1162446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x7 -- rdn 1163446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x8 -- rlf 1164446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x9 -- buffer (if needed) 1165446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x13 = -pitch 1166446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x15 = top-row in 1167ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie * x19 = bottom-row in 1168446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 1169446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x1 += rlf + min(count, rrt) 1170446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 1171446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x10 -- fill start index in the window 1172446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x11 -- fill stop index in the window 1173446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x12 -- scratch 1174446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1175446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch step=1, max_r=25 1176446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set need, ((\max_r + \max_r) * \step + 15) & ~15 1177446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1178446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x10, #need - (\max_r * \step) 1179446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x10, x10, x8 1180446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1181446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x10, #need - (\max_r * \step) 1182446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x10, x10, x8, LSL #2 1183446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1184446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x11, x10, x4 1185446788007efe0a673d0366284026adfa17b36fedSimon Hosie subs x11, x11, #need 1186446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x11, xzr, x11, hi 1187446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x11, x11, #need 1188446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1189446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1190446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1191446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v9.8h, v10.h[0] 1192446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1193446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v9.2d, v10.d[0] 1194446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 11955eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ands x12, x10, #15 1196446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 2f 11975eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #32 11985eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v10.8h,v11.8h}, [sp] 11995eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub x12, sp, x12, LSL #1 12005eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #16 12015eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v9.8h}, [sp] 12025eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #16 12035eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie st1 {v9.8h}, [sp] 12045eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ld1 {v10.8h,v11.8h}, [x12] 12055eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add sp, sp, #64 12065eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub x1, x1, x10 1207446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x15, x15, x10 1208ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie sub x19, x19, x10 1209446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic x10, x10, #15 1210446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x1, x1, x10 1211446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x15, x15, x10 1212ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x19, x19, x10 1213446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: 1214446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step > 1 1215446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* it's only in the uchar2 and uchar4 cases where the register file 1216446788007efe0a673d0366284026adfa17b36fedSimon Hosie * is insufficient (given MAX_R <= 25). 1217446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 12184bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one xx, xx, 192, c=\max_r, step=\step, store=2 12194bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one xx, xx, 176, c=\max_r, step=\step, store=2 12204bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one xx, v17.16b, 160, c=\max_r, step=\step, store=1 12214bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v18.16b, v19.16b, 144, c=\max_r, step=\step, store=0 12224bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v20.16b, v21.16b, 128, c=\max_r, step=\step, store=0 12234bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v22.16b, v23.16b, 112, c=\max_r, step=\step, store=0 12244bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v24.16b, v25.16b, 96, c=\max_r, step=\step, store=0 12254bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v26.16b, v27.16b, 80, c=\max_r, step=\step, store=0 12264bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v28.16b, v29.16b, 64, c=\max_r, step=\step, store=0 1227446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 12284bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v30.16b, v31.16b, 48, c=\max_r, step=\step, store=0 12294bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v4.16b, v5.16b, 32, c=\max_r, step=\step, store=0 12304bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v6.16b, v7.16b, 16, c=\max_r, step=\step, store=0 12314bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie prefetch_one v8.16b, v9.16b, 0, c=\max_r, step=\step, store=0 1232446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1233446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1234446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x10, x8, #\max_r * \step 1235446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1236446788007efe0a673d0366284026adfa17b36fedSimon Hosie lsl x10, x8, #2 1237446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x10, x10, #\max_r * \step 1238446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1239446788007efe0a673d0366284026adfa17b36fedSimon Hosie subs x4, x4, x10 1240446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x4, xzr, x4, lo 1241446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1242446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1243446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* The main loop. 1244446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 1245446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 1246446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x0 = dst 1247446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x1 = src 1248446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x2 = pitch 1249446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x3 = count 1250446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x4 = inlen 1251446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x5 = r 1252446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x6 = rup 1253446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x7 = rdn 1254446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x9 = buffer 1255446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x13 = -pitch 1256446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x15 = top-row in 1257ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie * x19 = bottom-row in 1258446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies 1259446788007efe0a673d0366284026adfa17b36fedSimon Hosie * x8 = fetch code pointer 1260446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1261446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro mainloop core, step=1, max_r=25, labelc="", labelnc="" 1262446788007efe0a673d0366284026adfa17b36fedSimon Hosie adrp x8, \labelnc 1263446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x8, x8, #:lo12:\labelnc 1264446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x8, x8, x5, LSL #5 1265446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x8, x8, x5, LSL #3 1266446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x5, x6 1267446788007efe0a673d0366284026adfa17b36fedSimon Hosie ccmp x5, x7, #0, eq 1268446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1269446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1270446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* if (r != rup || r != rdn) then the address-clamping table should 1271446788007efe0a673d0366284026adfa17b36fedSimon Hosie * be used rather than the short-cut version. 1272446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1273446788007efe0a673d0366284026adfa17b36fedSimon Hosie adrp x8, \labelc 1274446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x8, x8, #:lo12:\labelc 1275446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x8, x8, x5, LSL #6 1276446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x8, x8, x5, LSL #3 1277446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 5f 1278446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 1279446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=x8 1280446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1281446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* For each call to fetch two are made to \core. It would be 1282446788007efe0a673d0366284026adfa17b36fedSimon Hosie * preferable to have twice the work done in \core. 1283446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1284446788007efe0a673d0366284026adfa17b36fedSimon Hosie \core 1285446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v15.8b}, [x0], #8 1286446788007efe0a673d0366284026adfa17b36fedSimon Hosie \core 1287446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v15.8b}, [x0], #8 1288446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1289446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x3, x3, #16 1290446788007efe0a673d0366284026adfa17b36fedSimon Hosie5: subs x4, x4, #16 1291446788007efe0a673d0366284026adfa17b36fedSimon Hosie bhs 3b 1292446788007efe0a673d0366284026adfa17b36fedSimon Hosie adds x4, x4, #16 1293446788007efe0a673d0366284026adfa17b36fedSimon Hosie bne 1f 1294446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1295446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v10.8h, v9.h[7] 1296446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v11.8h, v9.h[7] 1297446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1298446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v10.2d, v9.d[1] 1299446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v11.2d, v9.d[1] 1300446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1301446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 4f 1302446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1303446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: sub x1, x1, #16 1304446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x15, x15, #16 1305ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie sub x19, x19, #16 1306446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x1, x1, x4 1307446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x15, x15, x4 1308ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie add x19, x19, x4 1309446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1310446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1311446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1312446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v12.8h, v11.h[7] 1313446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1314446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v12.2d, v11.d[1] 1315446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1316446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x4, xzr, x4 1317446788007efe0a673d0366284026adfa17b36fedSimon Hosie tbz x4, #3, 1f 1318446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v10.16b, v11.16b 1319446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov v11.16b, v12.16b 1320446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tbz x4, #2, 1f 1321446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v10.16b, v10.16b, v11.16b, #4*2 1322446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v11.16b, v11.16b, v12.16b, #4*2 1323446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tbz x4, #1, 1f 1324446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v10.16b, v10.16b, v11.16b, #2*2 1325446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v11.16b, v11.16b, v12.16b, #2*2 1326446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tbz x4, #0, 4f 1327446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v10.16b, v10.16b, v11.16b, #1*2 1328446788007efe0a673d0366284026adfa17b36fedSimon Hosie ext v11.16b, v11.16b, v12.16b, #1*2 1329446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: cbz x3, 5f 1330446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: \core 1331446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1332446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v11.8h, v11.h[7] 1333446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1334446788007efe0a673d0366284026adfa17b36fedSimon Hosie dup v11.2d, v11.d[1] 1335446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1336446788007efe0a673d0366284026adfa17b36fedSimon Hosie subs x3, x3, #8 1337446788007efe0a673d0366284026adfa17b36fedSimon Hosie blo 4f 1338446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v15.8b}, [x0], #8 1339446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1340446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 3b 1341446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: tbz x3, #2, 1f 1342446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v15.s}[0], [x0], #4 13435eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ext v15.8b, v15.8b, v15.8b, #4 1344446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tbz x3, #1, 1f 1345446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v15.h}[0], [x0], #2 13465eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ext v15.8b, v15.8b, v15.8b, #2 1347446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tbz x3, #0, 5f 1348446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v15.b}[0], [x0], #1 13495eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ext v15.8b, v15.8b, v15.8b, #1 1350446788007efe0a673d0366284026adfa17b36fedSimon Hosie5: nop 1351446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1352446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1353446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST1, 25 1354ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon HosiePRIVATE(convolve1_\r) 1355446788007efe0a673d0366284026adfa17b36fedSimon Hosie stp x29,x30, [sp, #-16]! 1356446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1357446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch step=1, max_r=\r 1358446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1359446788007efe0a673d0366284026adfa17b36fedSimon Hosie mainloop core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r 1360446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1361446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldp x29,x30, [sp], #16 1362446788007efe0a673d0366284026adfa17b36fedSimon Hosie ret 1363446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve1_\r) 1364446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr 1365446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1366446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST4, 25 1367ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon HosiePRIVATE(convolve4_\r) 13684bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie sub x12, sp, #0x040 13694bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie bic x9, x12, #0x07f 1370446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov sp, x9 1371446788007efe0a673d0366284026adfa17b36fedSimon Hosie stp x12,x30, [sp, #-16]! 1372446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1373446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* x9 now points to a buffer on the stack whose address has the low 13744bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie * 7 bits clear. This allows easy address calculation in the 1375446788007efe0a673d0366284026adfa17b36fedSimon Hosie * wrap-around cases. 1376446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1377446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1378446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1379446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch step=4, max_r=\r 1380446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1381446788007efe0a673d0366284026adfa17b36fedSimon Hosie mainloop core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r 1382446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1383446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldp x12,x30, [sp] 13844bea0d3b51fcdd9976af72c553a4a1d492016ca2Simon Hosie add sp, x12, #0x40 1385446788007efe0a673d0366284026adfa17b36fedSimon Hosie ret 1386446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve4_\r) 1387446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr 1388446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1389446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU1_K( 1390446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *out, // x0 1391446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *in, // x1 1392446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t w, // x2 1393446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t h, // x3 1394446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t p, // x4 1395446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t x, // x5 1396446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t y, // x6 1397446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t count, // x7 1398446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t r, // [sp] 1399446788007efe0a673d0366284026adfa17b36fedSimon Hosie * uint16_t *tab); // [sp,#8] 1400446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1401446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU1_K) 1402ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie stp x19,x30, [sp, #-16]! 1403446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x8, sp, #32 1404446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub sp, sp, #64 1405446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v8.1d - v11.1d}, [sp] 1406446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v12.1d - v15.1d}, [x8] 1407446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x8, x5 // x 1408ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldr w5, [sp,#80] // r 1409446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x9, x2, x8 1410446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x10, x3, x6 1411446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x2, x4 // pitch 1412446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x3, x7 // count 1413446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x7, x10, #1 1414446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x9, x9, x3 1415446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1416ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldr x12, [sp, #88] // tab 1417446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1418446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x1, x1, x8 1419446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1420446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x6, x5 1421446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x6, x5, x6, hs 1422446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x7, x5 1423446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x7, x5, x7, hs 1424446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x8, x5 1425446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x8, x5, x8, hs 1426446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x9, x5 14279107db900edec122b8f6f49a81ab444d8a6c9c42Simon Hosie csel x9, x5, x9, hs 1428446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1429446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x4, x8, x9 1430446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x4, x4, x3 1431446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1432446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x1, x1, x8 1433446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1434446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x13, xzr, x2 1435446788007efe0a673d0366284026adfa17b36fedSimon Hosie msub x15, x2, x6, x1 1436ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie madd x19, x2, x7, x1 1437446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1438446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v0.8h,v1.8h}, [x12], #32 1439446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v2.8h,v3.8h}, [x12], #32 1440446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1441446788007efe0a673d0366284026adfa17b36fedSimon Hosie adr x30, 1f 1442446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irep r, TUNED_LIST1 1443446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x5, #\r 1444446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls convolve1_\r 1445446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 1446446788007efe0a673d0366284026adfa17b36fedSimon Hosie b convolve1_25 1447446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1448446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: ld1 {v8.1d - v11.1d}, [sp], #32 1449446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v12.1d - v15.1d}, [sp], #32 1450ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldp x19,x30, [sp], #16 1451446788007efe0a673d0366284026adfa17b36fedSimon Hosie ret 1452446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU1_K) 1453446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1454446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU4_K( 1455446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *out, // x0 1456446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *in, // x1 1457446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t w, // x2 1458446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t h, // x3 1459446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t p, // x4 1460446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t x, // x5 1461446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t y, // x6 1462446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t count, // x7 1463446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t r, // [sp] 1464446788007efe0a673d0366284026adfa17b36fedSimon Hosie * uint16_t *tab); // [sp,#8] 1465446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1466446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU4_K) 1467ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie stp x19,x30, [sp, #-16]! 1468446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x8, sp, #32 1469446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub sp, sp, #64 1470446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v8.1d - v11.1d}, [sp] 1471446788007efe0a673d0366284026adfa17b36fedSimon Hosie st1 {v12.1d - v15.1d}, [x8] 1472446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x8, x5 // x 1473ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldr w5, [sp,#80] // r 1474446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x9, x2, x8 1475446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x10, x3, x6 1476446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x2, x4 // pitch 1477446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov x3, x7 // count 1478446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x7, x10, #1 1479446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x9, x9, x3 1480446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1481ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldr x12, [sp, #88] 1482446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1483446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x1, x1, x8, LSL #2 1484446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1485446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x6, x5 1486446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x6, x5, x6, hs 1487446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x7, x5 1488446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x7, x5, x7, hs 1489446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x8, x5 1490446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x8, x5, x8, hs 1491446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x9, x5 1492446788007efe0a673d0366284026adfa17b36fedSimon Hosie csel x9, x5, x9, hs 1493446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1494446788007efe0a673d0366284026adfa17b36fedSimon Hosie lsl x3, x3, #2 1495446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x4, x8, x9 1496446788007efe0a673d0366284026adfa17b36fedSimon Hosie add x4, x3, x4, LSL #2 1497446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1498446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x1, x1, x8, LSL #2 1499446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1500446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub x13, xzr, x2 1501446788007efe0a673d0366284026adfa17b36fedSimon Hosie msub x15, x2, x6, x1 1502ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie madd x19, x2, x7, x1 1503446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1504446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v0.8h,v1.8h}, [x12], #32 1505446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v2.8h,v3.8h}, [x12], #32 1506446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1507446788007efe0a673d0366284026adfa17b36fedSimon Hosie adr x30, 1f 1508446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irep r, TUNED_LIST4 1509446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp x5, #\r 1510446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls convolve4_\r 1511446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 1512446788007efe0a673d0366284026adfa17b36fedSimon Hosie b convolve4_25 1513446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1514446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: ld1 {v8.1d - v11.1d}, [sp], #32 1515446788007efe0a673d0366284026adfa17b36fedSimon Hosie ld1 {v12.1d - v15.1d}, [sp], #32 1516ea76eb386a2d851d50be69ebeb7ae593f84a5be9Simon Hosie ldp x19,x30, [sp], #16 1517446788007efe0a673d0366284026adfa17b36fedSimon Hosie ret 1518446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU4_K) 1519