1446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* 2446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Copyright (C) 2014 The Android Open Source Project 3446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 4446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License"); 5446788007efe0a673d0366284026adfa17b36fedSimon Hosie * you may not use this file except in compliance with the License. 6446788007efe0a673d0366284026adfa17b36fedSimon Hosie * You may obtain a copy of the License at 7446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 8446788007efe0a673d0366284026adfa17b36fedSimon Hosie * http://www.apache.org/licenses/LICENSE-2.0 9446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 10446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Unless required by applicable law or agreed to in writing, software 11446788007efe0a673d0366284026adfa17b36fedSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS, 12446788007efe0a673d0366284026adfa17b36fedSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13446788007efe0a673d0366284026adfa17b36fedSimon Hosie * See the License for the specific language governing permissions and 14446788007efe0a673d0366284026adfa17b36fedSimon Hosie * limitations under the License. 15446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 16446788007efe0a673d0366284026adfa17b36fedSimon Hosie 17446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart 185eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie#define PRIVATE(f) .text; .align 4; .type f,#function; f: .fnstart 19446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define END(f) .fnend; .size f, .-f; 20446788007efe0a673d0366284026adfa17b36fedSimon Hosie 21446788007efe0a673d0366284026adfa17b36fedSimon Hosie.eabi_attribute 25,1 @Tag_ABI_align8_preserved 22446788007efe0a673d0366284026adfa17b36fedSimon Hosie.arm 23446788007efe0a673d0366284026adfa17b36fedSimon Hosie 24446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Number of fractional bits to preserve in intermediate results. The 25446788007efe0a673d0366284026adfa17b36fedSimon Hosie * intermediate storage is 16-bit, and we started with 8 bit data (the integer 26446788007efe0a673d0366284026adfa17b36fedSimon Hosie * part), so this should be between 0 and 8. 27446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 28446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set FRACTION_BITS, 7 29446788007efe0a673d0366284026adfa17b36fedSimon Hosie 30446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set MAX_R, 25 31446788007efe0a673d0366284026adfa17b36fedSimon Hosie 32446788007efe0a673d0366284026adfa17b36fedSimon Hosie 33446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* A quick way of making a line of code conditional on some other condition. 34446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with 35446788007efe0a673d0366284026adfa17b36fedSimon Hosie * `ifcc`: 36446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 37446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro ifcc zzz:vararg 38446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if cc 39446788007efe0a673d0366284026adfa17b36fedSimon Hosie \zzz 40446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif 41446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 42446788007efe0a673d0366284026adfa17b36fedSimon Hosie 43446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fetch 16 columns of bytes (regardless of image format), convolve these 44446788007efe0a673d0366284026adfa17b36fedSimon Hosie * vertically, and leave them in the register file. If working near the top or 45446788007efe0a673d0366284026adfa17b36fedSimon Hosie * bottom of an image then clamp the addressing while loading the data in. 46446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 47446788007efe0a673d0366284026adfa17b36fedSimon Hosie * The convolution is fully unrolled for windows up to max_r, with the 48446788007efe0a673d0366284026adfa17b36fedSimon Hosie * outermost edges calculated first. This way it's possible to branch directly 49446788007efe0a673d0366284026adfa17b36fedSimon Hosie * into the relevant part of the code for an arbitrary convolution radius. Two 50446788007efe0a673d0366284026adfa17b36fedSimon Hosie * variants of the loop are produced; one eliminates the clamping code for a 51446788007efe0a673d0366284026adfa17b36fedSimon Hosie * slight speed advantage. 52446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 53446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Where the macro is called with reg=x, the specified register is taken to 54446788007efe0a673d0366284026adfa17b36fedSimon Hosie * contain a pre-calculated pointer into one of the two loops. 55446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 56446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 57446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 -- src 58446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r2 -- pitch 59446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r5 -- r 60446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r6 -- rup 61446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r7 -- rdn 62446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r12 -- switch index 63446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q0-q3 -- coefficient table 64446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 65446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 += 16 66446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q10,q11 -- 16 convolved columns 67446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 68446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r10 = upper row pointer 69446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r11 = lower row pointer 70446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q12-q15 = temporary sums 71446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 72446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=r12 /*{{{*/ 73446788007efe0a673d0366284026adfa17b36fedSimon Hosie .ifc \reg,r12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif 74446788007efe0a673d0366284026adfa17b36fedSimon Hosie 75446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d30,d31}, [r1] 76446788007efe0a673d0366284026adfa17b36fedSimon Hosie mls r10, r2, r6, r1 77446788007efe0a673d0366284026adfa17b36fedSimon Hosie 78446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmovl.u8 q14, d30 79446788007efe0a673d0366284026adfa17b36fedSimon Hosie pld [r1, #32] 80446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmovl.u8 q15, d31 81446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \max_r < 16 // approximate 82446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc adr \reg, 1f 83446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 84446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc ldr \reg, 2f 85446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: ifcc add \reg, \reg, pc 86446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 87446788007efe0a673d0366284026adfa17b36fedSimon Hosie 88446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q12, d28, d0[0] 89446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc sub \reg, r5, LSL #6 90446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q13, d29, d0[0] 91446788007efe0a673d0366284026adfa17b36fedSimon Hosie mla r11, r2, r7, r1 92446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d30, d0[0] 93446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, #16 94446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d31, d0[0] 95446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx \reg 96446788007efe0a673d0366284026adfa17b36fedSimon Hosie 97446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc .align 2 98446788007efe0a673d0366284026adfa17b36fedSimon Hosie 2: ifcc .word 1f-1b-8 99446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irp rowclamp, 1, 0 101446788007efe0a673d0366284026adfa17b36fedSimon Hosie .set cc, \rowclamp 102446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 103446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irp dreg, 6, 5, 4, 3, 2, 1, 0 ; .irp lane, 3, 2, 1, 0 104446788007efe0a673d0366284026adfa17b36fedSimon Hosie .set i, \dreg * 4 + \lane 105446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if 0 < i && i <= \max_r 106446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \rowclamp 107446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d20,d21}, [r10] 108446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d22,d23}, [r11] 109446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r6, #i 110446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 111446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d20,d21}, [r10], r2 112446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d22,d23}, [r11] 113446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r11, r11, r2 114446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 115446788007efe0a673d0366284026adfa17b36fedSimon Hosie vswp d21, d22 116446788007efe0a673d0366284026adfa17b36fedSimon Hosie pld [r10, #32] 117446788007efe0a673d0366284026adfa17b36fedSimon Hosie vaddl.u8 q10, d20, d21 118446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc addhs r10, r10, r2 119446788007efe0a673d0366284026adfa17b36fedSimon Hosie vaddl.u8 q11, d22, d23 120446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc cmp r7, #i 121446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q12, d20, d\dreg[\lane] 122446788007efe0a673d0366284026adfa17b36fedSimon Hosie pld [r11, #32] 123446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q13, d21, d\dreg[\lane] 124446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc subhs r11, r11, r2 125446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d22, d\dreg[\lane] 126446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc nop 127446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d23, d\dreg[\lane] 128446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 129446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr ; .endr 130446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \rowclamp == 1 131446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1: \labelc : 132446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 2f 133446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 134446788007efe0a673d0366284026adfa17b36fedSimon Hosie 2: \labelnc : 135446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 136446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 137446788007efe0a673d0366284026adfa17b36fedSimon Hosie 138446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d20, q12, #16 - FRACTION_BITS 139446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d21, q13, #16 - FRACTION_BITS 140446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d22, q14, #16 - FRACTION_BITS 141446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d23, q15, #16 - FRACTION_BITS 142446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm /*}}}*/ 143446788007efe0a673d0366284026adfa17b36fedSimon Hosie 144446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Some portion of the convolution window (as much as will fit, and all of it 145446788007efe0a673d0366284026adfa17b36fedSimon Hosie * for the uchar1 cases) is kept in the register file to avoid unnecessary 146446788007efe0a673d0366284026adfa17b36fedSimon Hosie * memory accesses. This forces the horizontal loops to be unrolled because 147446788007efe0a673d0366284026adfa17b36fedSimon Hosie * there's no indexed addressing into the register file. 148446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 149446788007efe0a673d0366284026adfa17b36fedSimon Hosie * As in the fetch macro, the operations are ordered from outside to inside, so 150446788007efe0a673d0366284026adfa17b36fedSimon Hosie * that jumping into the middle of the block bypasses the unwanted window taps. 151446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 152446788007efe0a673d0366284026adfa17b36fedSimon Hosie * There are several variants of the macro because of the fixed offets of the 153446788007efe0a673d0366284026adfa17b36fedSimon Hosie * taps -- the wider the maximum radius the further the centre tap is from the 154446788007efe0a673d0366284026adfa17b36fedSimon Hosie * most recently fetched data. This means that pre-filling the window requires 155446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more data that won't be used and it means that rotating the window involves 156446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more mov operations. 157446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 158446788007efe0a673d0366284026adfa17b36fedSimon Hosie * When the buffer gets too big the buffer at [r9] is used. 159446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 160446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 161446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q4-q11 -- convoltion window 162446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r9 -- pointer to additional convolution window data 163446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 164446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r9 -- updated buffer pointer (if used) 165446788007efe0a673d0366284026adfa17b36fedSimon Hosie * d31 -- result to be stored 166446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 167446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r12 -- temp buffer pointer 168446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q12-q13 -- temporaries for load and vext operations. 169446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q14-q15 -- intermediate sums 170446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 171446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST1 8, 16 172446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_8/*{{{*/ 173446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d18, d0[0] 174446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d19, d0[0] 175446788007efe0a673d0366284026adfa17b36fedSimon Hosie 176446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 177446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 178446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 179446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 180446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 181446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 182446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 183446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 184446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 185446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 186446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 187446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: vmlal.u16 q14, d16, d2[0] 188446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d17, d2[0] 189446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d2[0] 190446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d2[0] 191446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: vext.u16 q12, q8, q9, #1 192446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #7 193446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 194446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 195446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[3] 196446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[3] 197446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: vext.u16 q12, q8, q9, #2 198446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #6 199446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 200446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 201446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[2] 202446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[2] 203446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: vext.u16 q12, q8, q9, #3 204446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #5 205446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 206446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 207446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[1] 208446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[1] 209446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: //vext.u16 q12, q8, q9, #4 210446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q9, q10, #4 211446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d1[0] 212446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d1[0] 213446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d1[0] 214446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d1[0] 215446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: vext.u16 q12, q8, q9, #5 216446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #3 217446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 218446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 219446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[3] 220446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[3] 221446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: vext.u16 q12, q8, q9, #6 222446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #2 223446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 224446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 225446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[2] 226446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[2] 227446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: vext.u16 q12, q8, q9, #7 228446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #1 229446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 230446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[1] 231446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[1] 232446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[1] 233446788007efe0a673d0366284026adfa17b36fedSimon Hosie 234446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 235446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 236446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 237446788007efe0a673d0366284026adfa17b36fedSimon Hosie 238446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 239446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 240446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 241446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 242446788007efe0a673d0366284026adfa17b36fedSimon Hosie 243446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_16/*{{{*/ 244446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d16, d0[0] 245446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d17, d0[0] 246446788007efe0a673d0366284026adfa17b36fedSimon Hosie 247446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 248446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 249446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 250446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 251446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 252446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 253446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 254446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 255446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 256446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 257446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 258446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 109f-100b 259446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 110f-100b 260446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 111f-100b 261446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 112f-100b 262446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 113f-100b 263446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 114f-100b 264446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 115f-100b 265446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 116f-100b 266446788007efe0a673d0366284026adfa17b36fedSimon Hosie 116: //vext.u16 q12, q6, q7, #0 267446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q10, q11, #0 268446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d12, d4[0] 269446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d13, d4[0] 270446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d4[0] 271446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d4[0] 272446788007efe0a673d0366284026adfa17b36fedSimon Hosie 115: vext.u16 q12, q6, q7, #1 273446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #7 274446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[3] 275446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[3] 276446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[3] 277446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[3] 278446788007efe0a673d0366284026adfa17b36fedSimon Hosie 114: vext.u16 q12, q6, q7, #2 279446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #6 280446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[2] 281446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[2] 282446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[2] 283446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[2] 284446788007efe0a673d0366284026adfa17b36fedSimon Hosie 113: vext.u16 q12, q6, q7, #3 285446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #5 286446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[1] 287446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[1] 288446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[1] 289446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[1] 290446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: //vext.u16 q12, q6, q7, #4 291446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q9, q10, #4 292446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d13, d3[0] 293446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d14, d3[0] 294446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d3[0] 295446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d3[0] 296446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: vext.u16 q12, q6, q7, #5 297446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #3 298446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[3] 299446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[3] 300446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[3] 301446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[3] 302446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: vext.u16 q12, q6, q7, #6 303446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #2 304446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[2] 305446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[2] 306446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[2] 307446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[2] 308446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: vext.u16 q12, q6, q7, #7 309446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #1 310446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[1] 311446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[1] 312446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[1] 313446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[1] 314446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: //vext.u16 q12, q7, q8, #0 315446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q9, q10, #0 316446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d14, d2[0] 317446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d15, d2[0] 318446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d18, d2[0] 319446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d19, d2[0] 320446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: vext.u16 q12, q7, q8, #1 321446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #7 322446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 323446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 324446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[3] 325446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[3] 326446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: vext.u16 q12, q7, q8, #2 327446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #6 328446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 329446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 330446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[2] 331446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[2] 332446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: vext.u16 q12, q7, q8, #3 333446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #5 334446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 335446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 336446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[1] 337446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[1] 338446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: //vext.u16 q12, q7, q8, #4 339446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q8, q9, #4 340446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d15, d1[0] 341446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d16, d1[0] 342446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d1[0] 343446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d1[0] 344446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: vext.u16 q12, q7, q8, #5 345446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #3 346446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 347446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 348446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[3] 349446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[3] 350446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: vext.u16 q12, q7, q8, #6 351446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #2 352446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 353446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 354446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[2] 355446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[2] 356446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: vext.u16 q12, q7, q8, #7 357446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #1 358446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 359446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[1] 360446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[1] 361446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[1] 362446788007efe0a673d0366284026adfa17b36fedSimon Hosie 363446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 364446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 365446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 366446788007efe0a673d0366284026adfa17b36fedSimon Hosie 367446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 368446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 369446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 370446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 371446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 372446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 373446788007efe0a673d0366284026adfa17b36fedSimon Hosie 374446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_25/*{{{*/ 375446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q12, q6, q7, #7 376446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d24, d0[0] 377446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d25, d0[0] 378446788007efe0a673d0366284026adfa17b36fedSimon Hosie 379446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 380446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 381446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 382446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 383446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 384446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 385446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 386446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 387446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 388446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 389446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 390446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 109f-100b 391446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 110f-100b 392446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 111f-100b 393446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 112f-100b 394446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 113f-100b 395446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 114f-100b 396446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 115f-100b 397446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 116f-100b 398446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 117f-100b 399446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 118f-100b 400446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 119f-100b 401446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 120f-100b 402446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 121f-100b 403446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 122f-100b 404446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 123f-100b 405446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 124f-100b 406446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 125f-100b 407446788007efe0a673d0366284026adfa17b36fedSimon Hosie 125: vext.u16 q12, q3, q4, #6 408446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q10, q11, #0 409446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d6[1] 410446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d6[1] 411446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d6[1] 412446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d6[1] 413446788007efe0a673d0366284026adfa17b36fedSimon Hosie 124: vext.u16 q12, q3, q4, #7 414446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #7 415446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d6[0] 416446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d6[0] 417446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d6[0] 418446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d6[0] 419446788007efe0a673d0366284026adfa17b36fedSimon Hosie 123: vext.u16 q12, q4, q5, #0 420446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #6 421446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[3] 422446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[3] 423446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d5[3] 424446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d5[3] 425446788007efe0a673d0366284026adfa17b36fedSimon Hosie 122: vext.u16 q12, q4, q5, #1 426446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #5 427446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[2] 428446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[2] 429446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d5[2] 430446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d5[2] 431446788007efe0a673d0366284026adfa17b36fedSimon Hosie 121: vext.u16 q12, q4, q5, #2 432446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #4 433446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[1] 434446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[1] 435446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d5[1] 436446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d5[1] 437446788007efe0a673d0366284026adfa17b36fedSimon Hosie 120: vext.u16 q12, q4, q5, #3 438446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #3 439446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[0] 440446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[0] 441446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d5[0] 442446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d5[0] 443446788007efe0a673d0366284026adfa17b36fedSimon Hosie 119: vext.u16 q12, q4, q5, #4 444446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #2 445446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[3] 446446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[3] 447446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d4[3] 448446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d4[3] 449446788007efe0a673d0366284026adfa17b36fedSimon Hosie 118: vext.u16 q12, q4, q5, #5 450446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #1 451446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[2] 452446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[2] 453446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d4[2] 454446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d4[2] 455446788007efe0a673d0366284026adfa17b36fedSimon Hosie 117: vext.u16 q12, q4, q5, #6 456446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #0 457446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[1] 458446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[1] 459446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d4[1] 460446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d4[1] 461446788007efe0a673d0366284026adfa17b36fedSimon Hosie 116: vext.u16 q12, q4, q5, #7 462446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #7 463446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[0] 464446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[0] 465446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d4[0] 466446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d4[0] 467446788007efe0a673d0366284026adfa17b36fedSimon Hosie 115: vext.u16 q12, q5, q6, #0 468446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #6 469446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[3] 470446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[3] 471446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[3] 472446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[3] 473446788007efe0a673d0366284026adfa17b36fedSimon Hosie 114: vext.u16 q12, q5, q6, #1 474446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #5 475446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[2] 476446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[2] 477446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[2] 478446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[2] 479446788007efe0a673d0366284026adfa17b36fedSimon Hosie 113: vext.u16 q12, q5, q6, #2 480446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #4 481446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[1] 482446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[1] 483446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[1] 484446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[1] 485446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: vext.u16 q12, q5, q6, #3 486446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #3 487446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[0] 488446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[0] 489446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[0] 490446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[0] 491446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: vext.u16 q12, q5, q6, #4 492446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #2 493446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[3] 494446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[3] 495446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[3] 496446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[3] 497446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: vext.u16 q12, q5, q6, #5 498446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #1 499446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[2] 500446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[2] 501446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[2] 502446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[2] 503446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: vext.u16 q12, q5, q6, #6 504446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #0 505446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[1] 506446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[1] 507446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[1] 508446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[1] 509446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: vext.u16 q12, q5, q6, #7 510446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #7 511446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[0] 512446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[0] 513446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[0] 514446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[0] 515446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: vext.u16 q12, q6, q7, #0 516446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #6 517446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 518446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 519446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[3] 520446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[3] 521446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: vext.u16 q12, q6, q7, #1 522446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #5 523446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 524446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 525446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[2] 526446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[2] 527446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: vext.u16 q12, q6, q7, #2 528446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #4 529446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 530446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 531446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[1] 532446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[1] 533446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: vext.u16 q12, q6, q7, #3 534446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #3 535446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[0] 536446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[0] 537446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[0] 538446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[0] 539446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: vext.u16 q12, q6, q7, #4 540446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #2 541446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 542446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 543446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[3] 544446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[3] 545446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: vext.u16 q12, q6, q7, #5 546446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #1 547446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 548446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 549446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[2] 550446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[2] 551446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: vext.u16 q12, q6, q7, #6 552446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #0 553446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 554446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[1] 555446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[1] 556446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[1] 557446788007efe0a673d0366284026adfa17b36fedSimon Hosie 558446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 559446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 560446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 561446788007efe0a673d0366284026adfa17b36fedSimon Hosie 562446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov d7, d9 563446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q4, q5 564446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q5, q6 565446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 566446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 567446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 568446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 569446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 570446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 571446788007efe0a673d0366284026adfa17b36fedSimon Hosie 572446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST4 6, 12 573446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_6/*{{{*/ 574446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d14, d0[0] 575446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d15, d0[0] 576446788007efe0a673d0366284026adfa17b36fedSimon Hosie 577446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 578446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 579446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 580446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 581446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 582446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 583446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 584446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 585446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 586446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: vmlal.u16 q14, d8, d1[2] 587446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d9, d1[2] 588446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d1[2] 589446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d1[2] 590446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: vmlal.u16 q14, d9, d1[1] 591446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d10, d1[1] 592446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d1[1] 593446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d1[1] 594446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: vmlal.u16 q14, d10, d1[0] 595446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d11, d1[0] 596446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d18, d1[0] 597446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d19, d1[0] 598446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: vmlal.u16 q14, d11, d0[3] 599446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d12, d0[3] 600446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d0[3] 601446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d0[3] 602446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: vmlal.u16 q14, d12, d0[2] 603446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d13, d0[2] 604446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d16, d0[2] 605446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d17, d0[2] 606446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: vmlal.u16 q14, d13, d0[1] 607446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d14, d0[1] 608446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d15, d0[1] 609446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d16, d0[1] 610446788007efe0a673d0366284026adfa17b36fedSimon Hosie 611446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 612446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 613446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 614446788007efe0a673d0366284026adfa17b36fedSimon Hosie 615446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q4, q5 616446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q5, q6 617446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 618446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 619446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 620446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 621446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 622446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 623446788007efe0a673d0366284026adfa17b36fedSimon Hosie 624446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_12/*{{{*/ 625446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d8, d0[0] 626446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d9, d0[0] 627446788007efe0a673d0366284026adfa17b36fedSimon Hosie 628446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 629446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 630446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 631446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 632446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 633446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 634446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 635446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 636446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 637446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 638446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 639446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 109f-100b 640446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 110f-100b 641446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 111f-100b 642446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 112f-100b 643446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: add r12, r9, #0x1a0 644446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 645446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 646446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[0] 647446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[0] 648446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d3[0] 649446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d3[0] 650446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: add r12, r9, #0x1a8 651446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 652446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 653446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 654446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 655446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[3] 656446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[3] 657446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d2[3] 658446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d2[3] 659446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: add r12, r9, #0x1b0 660446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 661446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 662446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[2] 663446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[2] 664446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d18, d2[2] 665446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d19, d2[2] 666446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: add r12, r9, #0x1b8 667446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 668446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 669446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 670446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 671446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[1] 672446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[1] 673446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d2[1] 674446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d2[1] 675446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: add r12, r9, #0x1c0 676446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 677446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 678446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[0] 679446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[0] 680446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d16, d2[0] 681446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d17, d2[0] 682446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: add r12, r9, #0x1c8 683446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 684446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 685446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 686446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 687446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 688446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 689446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d15, d1[3] 690446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d16, d1[3] 691446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: add r12, r9, #0x1d0 692446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 693446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 694446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 695446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 696446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d14, d1[2] 697446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d15, d1[2] 698446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: add r12, r9, #0x1d8 699446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 700446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 701446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 702446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 703446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 704446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 705446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d13, d1[1] 706446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d14, d1[1] 707446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: add r12, r9, #0x1e0 708446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 709446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 710446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[0] 711446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[0] 712446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d12, d1[0] 713446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d13, d1[0] 714446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: add r12, r9, #0x1e8 715446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 716446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 717446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 718446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 719446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 720446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 721446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d11, d0[3] 722446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d12, d0[3] 723446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: add r12, r9, #0x1f0 724446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 725446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 726446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 727446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 728446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d10, d0[2] 729446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d11, d0[2] 730446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: add r12, r9, #0x1f8 731446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 732446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64] 733446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 734446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d8, d0[1] 735446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d9, d0[1] 736446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d10, d0[1] 737446788007efe0a673d0366284026adfa17b36fedSimon Hosie 738446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 739446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 740446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 741446788007efe0a673d0366284026adfa17b36fedSimon Hosie 742446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {q4}, [r9:128]! 743446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r9, r9, #0x200 744446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q4, q5 745446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q5, q6 746446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 747446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 748446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 749446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 750446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 751446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 752446788007efe0a673d0366284026adfa17b36fedSimon Hosie 753446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_25/*{{{*/ 754446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x198 755446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 756446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 757446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 758446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 759446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d24, d0[0] 760446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d25, d0[0] 761446788007efe0a673d0366284026adfa17b36fedSimon Hosie 762446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 763446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 764446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 765446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 766446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 767446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 768446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 769446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 770446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 771446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 772446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 773446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 109f-100b 774446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 110f-100b 775446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 111f-100b 776446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 112f-100b 777446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 113f-100b 778446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 114f-100b 779446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 115f-100b 780446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 116f-100b 781446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 117f-100b 782446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 118f-100b 783446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 119f-100b 784446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 120f-100b 785446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 121f-100b 786446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 122f-100b 787446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 123f-100b 788446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 124f-100b 789446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 125f-100b 790446788007efe0a673d0366284026adfa17b36fedSimon Hosie 125: add r12, r9, #0x0d0 791446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 792446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 793446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d6[1] 794446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d6[1] 795446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d6[1] 796446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d6[1] 797446788007efe0a673d0366284026adfa17b36fedSimon Hosie 124: add r12, r9, #0x0d8 798446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 799446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 800446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 801446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 802446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d6[0] 803446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d6[0] 804446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d6[0] 805446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d6[0] 806446788007efe0a673d0366284026adfa17b36fedSimon Hosie 123: add r12, r9, #0x0e0 807446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 808446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 809446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[3] 810446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[3] 811446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d18, d5[3] 812446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d19, d5[3] 813446788007efe0a673d0366284026adfa17b36fedSimon Hosie 122: add r12, r9, #0x0e8 814446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 815446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 816446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 817446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 818446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[2] 819446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[2] 820446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d5[2] 821446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d5[2] 822446788007efe0a673d0366284026adfa17b36fedSimon Hosie 121: add r12, r9, #0x0f0 823446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 824446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 825446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[1] 826446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[1] 827446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d16, d5[1] 828446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d17, d5[1] 829446788007efe0a673d0366284026adfa17b36fedSimon Hosie 120: add r12, r9, #0x0f8 830446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 831446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 832446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 833446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 834446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[0] 835446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[0] 836446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d15, d5[0] 837446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d16, d5[0] 838446788007efe0a673d0366284026adfa17b36fedSimon Hosie 119: add r12, r9, #0x100 839446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 840446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 841446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[3] 842446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[3] 843446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d14, d4[3] 844446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d15, d4[3] 845446788007efe0a673d0366284026adfa17b36fedSimon Hosie 118: add r12, r9, #0x108 846446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 847446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 848446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 849446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 850446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[2] 851446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[2] 852446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d13, d4[2] 853446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d14, d4[2] 854446788007efe0a673d0366284026adfa17b36fedSimon Hosie 117: add r12, r9, #0x110 855446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 856446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 857446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[1] 858446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[1] 859446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d12, d4[1] 860446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d13, d4[1] 861446788007efe0a673d0366284026adfa17b36fedSimon Hosie 116: add r12, r9, #0x118 862446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 863446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 864446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 865446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 866446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[0] 867446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[0] 868446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d11, d4[0] 869446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d12, d4[0] 870446788007efe0a673d0366284026adfa17b36fedSimon Hosie 115: add r12, r9, #0x120 871446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 872446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 873446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[3] 874446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[3] 875446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d10, d3[3] 876446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d11, d3[3] 877446788007efe0a673d0366284026adfa17b36fedSimon Hosie 114: add r12, r9, #0x128 878446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 879446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 880446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 881446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 882446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[2] 883446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[2] 884446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d9, d3[2] 885446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d10, d3[2] 886446788007efe0a673d0366284026adfa17b36fedSimon Hosie 113: add r12, r9, #0x130 887446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 888446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 889446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[1] 890446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[1] 891446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d8, d3[1] 892446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d9, d3[1] 893446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: add r12, r9, #0x138 894446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 895446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 896446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 897446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 898446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1f8 899446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 900446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64] 901446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[0] 902446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[0] 903446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[0] @ Could be d7, without the load, right? 904446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d8, d3[0] 905446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: add r12, r9, #0x140 906446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 907446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 908446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1f0 909446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 910446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 911446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[3] 912446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[3] 913446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[3] 914446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[3] 915446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: add r12, r9, #0x148 916446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 917446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 918446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 919446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 920446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1e8 921446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 922446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 923446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 924446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 925446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[2] 926446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[2] 927446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[2] 928446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[2] 929446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: add r12, r9, #0x150 930446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 931446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 932446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1e0 933446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 934446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 935446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[1] 936446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[1] 937446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[1] 938446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[1] 939446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: add r12, r9, #0x158 940446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 941446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 942446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 943446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 944446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1d8 945446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 946446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 947446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 948446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 949446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[0] 950446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[0] 951446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[0] 952446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[0] 953446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: add r12, r9, #0x160 954446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 955446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 956446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1d0 957446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 958446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 959446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 960446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 961446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[3] 962446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[3] 963446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: add r12, r9, #0x168 964446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 965446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 966446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 967446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 968446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1c8 969446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 970446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 971446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 972446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 973446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 974446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 975446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[2] 976446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[2] 977446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: add r12, r9, #0x170 978446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 979446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 980446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1c0 981446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 982446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 983446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 984446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 985446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[1] 986446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[1] 987446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: add r12, r9, #0x178 988446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 989446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 990446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 991446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 992446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1b8 993446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 994446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 995446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 996446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 997446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[0] 998446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[0] 999446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[0] 1000446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[0] 1001446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: add r12, r9, #0x180 1002446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1003446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 1004446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1b0 1005446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1006446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 1007446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 1008446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 1009446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[3] 1010446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[3] 1011446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: add r12, r9, #0x188 1012446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1013446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 1014446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1015446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 1016446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1a8 1017446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1018446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 1019446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1020446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 1021446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 1022446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 1023446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[2] 1024446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[2] 1025446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: add r12, r9, #0x190 1026446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1027446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128]! 1028446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1029446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 1030446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 1031446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[1] 1032446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[1] 1033446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[1] 1034446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1035446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 1036446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 1037446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 1038446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1039446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {q4}, [r9:128]! 1040446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r9, r9, #0x200 1041446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q4, q5 1042446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q5, q6 1043446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 1044446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 1045446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 1046446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 1047446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 1048446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 1049446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1050446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Dedicated function wrapper for the fetch macro, for the cases where 1051446788007efe0a673d0366284026adfa17b36fedSimon Hosie * performance isn't that important, to keep code size down. 1052446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 10535eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(fetch_generic_asm) 1054446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r10,r11} 1055446788007efe0a673d0366284026adfa17b36fedSimon Hosie fetch 1056446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r10,r11} 1057446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx lr 1058446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(fetch_generic_asm) 1059446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1060446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Given values in q10 and q11, and an index in r11, sweep the (r11&15)th value 1061446788007efe0a673d0366284026adfa17b36fedSimon Hosie * across to fill the rest of the register pair. Used for filling the right 1062446788007efe0a673d0366284026adfa17b36fedSimon Hosie * hand edge of the window when starting too close to the right hand edge of 1063446788007efe0a673d0366284026adfa17b36fedSimon Hosie * the image. 10645eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * Also returns a dup-ed copy of the last element in q12 for the tail-fill 10655eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * case (this happens incidentally in common path, but must be done 10665eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * deliberately in the fast-out path). 1067446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 10685eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(prefetch_clampright1) 10695eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ands r12, r11, #15 1070446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 10715eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub r12, r12, #1 10725eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #64 10735eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q10,q11}, [sp] 10745eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add r12, sp, r12, LSL #1 10755eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vld1.u16 {d24[]}, [r12] 10765eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vld1.u16 {d25[]}, [r12] 10775eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q12}, [r12]! 10785eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q12}, [r12] 10795eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vld1.u16 {q10,q11}, [sp] 10805eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add sp, sp, #64 10815eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie bx lr 10825eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie1: vdup.u16 q12, d23[3] 1083446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx lr 10845eb463f2c28cf161fe31b8078908a7b01198516fSimon HosieEND(prefetch_clampright1) 1085446788007efe0a673d0366284026adfa17b36fedSimon Hosie 10865eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(prefetch_clampright4) 10875eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ands r12, r11, #15 1088446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 10895eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub r12, r12, #4 10905eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #64 10915eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q10,q11}, [sp] 10925eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add r12, sp, r12, LSL #1 10935eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vld1.u64 {d24}, [r12] 10945eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vld1.u64 {d25}, [r12] 10955eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q12}, [r12]! 10965eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q12}, [r12] 10975eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vld1.u16 {q10,q11}, [sp] 10985eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add sp, sp, #64 10995eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie bx lr 11005eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie1: vmov.u16 d24, d23 11015eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vmov.u16 d25, d23 1102446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx lr 11035eb463f2c28cf161fe31b8078908a7b01198516fSimon HosieEND(prefetch_clampright4) 1104446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1105446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1106446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Helpers for prefetch, below. 1107446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1108446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_out qa, qb, store, qsa, qsb, qsb_hi 1109446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \store > 0 1110446788007efe0a673d0366284026adfa17b36fedSimon Hosie .ifc \qsa,\qsb 1111446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u16 {\qsa}, [r9:128]! 1112446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u16 {\qsb}, [r9:128]! 1113446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1114446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u16 {\qsa,\qsb}, [r9:256]! 1115446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1116446788007efe0a673d0366284026adfa17b36fedSimon Hosie .elseif \store == 0 1117446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 \qa, \qsa 1118446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 \qb, \qsb 1119446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1120446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 \qb, \qsb_hi 1121446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1122446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1123446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1124446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_one qa, qb, rem, c, store=0, step=1 1125446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set i, (need - 16) - \rem 1126446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if i >= 0 1127446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: cmp r10, #i+16 1128446788007efe0a673d0366284026adfa17b36fedSimon Hosie blo 2f 1129446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, q9, q9, d19 1130446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 1f 1131446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: cmp r11, #i+16 1132446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls 3f 1133446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, q10, q11, d23 1134446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1135446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 2f 11365eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie3: bl prefetch_clampright\step 1137446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, q10, q11, d23 1138446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: b 4f+4 11395eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie @q12 contains pad word from prefetch_clampright call 1140446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, q12, q12, d25 1141446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \rem > 0 1142446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 4f+4 1143446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1144446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: 1145446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: 1146446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: 1147446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: nop 1148446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1149446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif 1150446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1151446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1152446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fill the convolution window with context data. The aim here is to load 1153446788007efe0a673d0366284026adfa17b36fedSimon Hosie * exactly rlf + rrt columns, and in the main loop to read as many columns as 1154446788007efe0a673d0366284026adfa17b36fedSimon Hosie * will be written. This is complicated by the need to handle cases when the 1155446788007efe0a673d0366284026adfa17b36fedSimon Hosie * input starts very close to the left or right (or both) edges of the image, 1156446788007efe0a673d0366284026adfa17b36fedSimon Hosie * and where these do not fall on 16-byte boundaries. 1157446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 1158446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 1159446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 -- src 1160446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r2 -- pitch 1161446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r3 -- count 1162446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r4 -- inlen 1163446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r5 -- r 1164446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r6 -- rup 1165446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r7 -- rdn 1166446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r8 -- rlf 1167446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r9 -- buffer (if needed) 1168446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 1169446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 += rlf + min(count, rrt) 1170446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 1171446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r10 -- fill start index in the window 1172446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r11 -- fill stop index in the window 1173446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r12 -- scratch 1174446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1175446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch step=1, max_r=25 1176446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set need, ((\max_r + \max_r) * \step + 15) & ~15 1177446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1178446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r10, r8, #need - (\max_r * \step) 1179446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1180446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov r10, r8, LSL #2 1181446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r10, r10, #need - (\max_r * \step) 1182446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1183446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r11, r10, r4 1184446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r11, #need 1185446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r11, #need 1186446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1187446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1188446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1189446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q9, d20[0] 1190446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1191446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 d18, d20 1192446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 d19, d20 1193446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 11945eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie ands r12, r10, #15 1195446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 2f 11965eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #32 11975eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q10,q11}, [sp] 11985eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub r12, sp, r12, LSL #1 11995eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #16 12005eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q9}, [sp] 12015eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub sp, sp, #16 12025eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vst1.u16 {q9}, [sp] 12035eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie vld1.u16 {q10,q11}, [r12] 12045eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie add sp, sp, #64 12055eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie sub r1, r1, r10 1206446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r10, r10, #15 1207446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, r10 1208446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: 1209446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step > 1 1210446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* it's only in the uchar2 and uchar4 cases where the register file 1211446788007efe0a673d0366284026adfa17b36fedSimon Hosie * is insufficient (given MAX_R <= 25). 1212446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1213446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 192, c=\max_r, step=\step, store=1 1214446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 176, c=\max_r, step=\step, store=1 1215446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 160, c=\max_r, step=\step, store=1 1216446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 144, c=\max_r, step=\step, store=1 1217446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 128, c=\max_r, step=\step, store=1 1218446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 112, c=\max_r, step=\step, store=1 1219446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 96, c=\max_r, step=\step, store=1 1220446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 80, c=\max_r, step=\step, store=1 1221446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 64, c=\max_r, step=\step, store=1 1222446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 48, c=\max_r, step=\step, store=1 1223446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1224446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* q3 normally contains the coefficient table, but it's not fully 1225446788007efe0a673d0366284026adfa17b36fedSimon Hosie * used. In the uchar1, r=25 case the other half of q3 is used for 1226446788007efe0a673d0366284026adfa17b36fedSimon Hosie * the last two window taps to avoid falling out to memory. 1227446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1228446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, d7, 48, c=\max_r, step=\step, store=-1 1229446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1230446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one q4, q5, 32, c=\max_r, step=\step, store=0 1231446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one q6, q7, 16, c=\max_r, step=\step, store=0 1232446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one q8, q9, 0, c=\max_r, step=\step, store=0 1233446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1234446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1235446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r10, r8, #\max_r * \step 1236446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1237446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov r10, r8, LSL #2 1238446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r10, r10, #\max_r * \step 1239446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1240446788007efe0a673d0366284026adfa17b36fedSimon Hosie subs r4, r4, r10 1241446788007efe0a673d0366284026adfa17b36fedSimon Hosie movlo r4, #0 1242446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1243446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1244446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* The main loop. 1245446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 1246446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 1247446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r0 = dst 1248446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 = src 1249446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r2 = pitch 1250446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r3 = count 1251446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r4 = inlen 1252446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r5 = r 1253446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r6 = rup 1254446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r7 = rdn 1255446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r9 = buffer 1256446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies 1257446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r8 = fetch code pointer 1258446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1259446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro mainloop core, step=1, max_r=25, labelc="", labelnc="" 1260446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r8, 3f 1261446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: add r8, r8, pc 1262446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r8, r5, LSL #5 1263446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r8, r5, LSL #4 1264446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r5, r6 1265446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmpeq r5, r7 1266446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1267446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1268446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* if (r != rup || r != rdn) then the address-clamping table should 1269446788007efe0a673d0366284026adfa17b36fedSimon Hosie * be used rather than the short-cut version. 1270446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1271446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r8, 3f+4 1272446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: add r8, r8, pc 1273446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r8, r5, LSL #6 1274446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 5f 1275446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 3 1276446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: .word \labelnc-1b-8 1277446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word \labelc-2b-8 1278446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 1279446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=r8 1280446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1281446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* For each call to fetch two are made to \core. It would be 1282446788007efe0a673d0366284026adfa17b36fedSimon Hosie * preferable to have twice the work done in \core, but the 1283446788007efe0a673d0366284026adfa17b36fedSimon Hosie * register file is too small for this to be straightforward. 1284446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1285446788007efe0a673d0366284026adfa17b36fedSimon Hosie \core 1286446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {d31}, [r0]! 1287446788007efe0a673d0366284026adfa17b36fedSimon Hosie \core 1288446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {d31}, [r0]! 1289446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1290446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r3, r3, #16 1291446788007efe0a673d0366284026adfa17b36fedSimon Hosie5: subs r4, r4, #16 1292446788007efe0a673d0366284026adfa17b36fedSimon Hosie bhs 3b 1293446788007efe0a673d0366284026adfa17b36fedSimon Hosie adds r4, r4, #16 1294446788007efe0a673d0366284026adfa17b36fedSimon Hosie bne 1f 1295446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1296446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q10, d19[3] 1297446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q11, d19[3] 1298446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1299446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d20, d19 1300446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d21, d19 1301446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d22, d19 1302446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d23, d19 1303446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1304446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 4f 1305446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1306446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: sub r1, r1, #16 1307446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, r4 1308446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1309446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1310446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1311446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q12, d23[3] 1312446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1313446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d24, d23 1314446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d25, d23 1315446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1316446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r4, r4, #0 1317446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r4, #8 1318446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1319446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 1320446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q11, q12 1321446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r4, #4 1322446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1323446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q10, q11, #4 1324446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q11, q12, #4 1325446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r4, #2 1326446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1327446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q10, q11, #2 1328446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q11, q12, #2 1329446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r4, #1 1330446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 4f 1331446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q10, q11, #1 1332446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q11, q12, #1 1333446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: cmp r3, #0 1334446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1335446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: \core 1336446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1337446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q11, d23[3] 1338446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1339446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d22, d23 1340446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1341446788007efe0a673d0366284026adfa17b36fedSimon Hosie subs r3, r3, #8 1342446788007efe0a673d0366284026adfa17b36fedSimon Hosie blo 4f 1343446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {d31}, [r0]! 1344446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1345446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 3b 1346446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: tst r3, #4 1347446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1348446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u32 {d31[0]}, [r0]! 1349446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u8 d31, d31, d31, #4 1350446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r3, #2 1351446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1352446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u16 {d31[0]}, [r0]! 1353446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u8 d31, d31, d31, #2 1354446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r3, #1 1355446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1356446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {d31[0]}, [r0]! 1357446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u8 d31, d31, d31, #1 1358446788007efe0a673d0366284026adfa17b36fedSimon Hosie5: nop 1359446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1360446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1361446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST1, 25 13625eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(convolve1_\r) 1363446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r12,lr} 1364446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1365446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, r8 1366446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1367446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch step=1, max_r=\r 1368446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1369446788007efe0a673d0366284026adfa17b36fedSimon Hosie mainloop core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r 1370446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1371446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r12,pc} 1372446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve1_\r) 1373446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr 1374446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1375446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST4, 25 13765eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(convolve4_\r) 1377446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r12, sp, #0x200 1378446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r9, r12, #0x3fc 1379446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov sp, r9 1380446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r12,lr} 1381446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1382446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* r9 now points to a buffer on the stack whose address has the low 1383446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 10 bits clear. This allows easy address calculation in the 1384446788007efe0a673d0366284026adfa17b36fedSimon Hosie * wrap-around cases. 1385446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1386446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1387446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, r8, LSL #2 1388446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1389446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch step=4, max_r=\r 1390446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1391446788007efe0a673d0366284026adfa17b36fedSimon Hosie mainloop core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r 1392446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1393446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r12,lr} 1394446788007efe0a673d0366284026adfa17b36fedSimon Hosie add sp, r12, #0x200 1395446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx lr 1396446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve4_\r) 1397446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr 1398446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1399446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU1_K( 1400446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *out, // r0 1401446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *in, // r1 1402446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t w, // r2 1403446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t h, // r3 1404446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t p, // [sp] 1405446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t x, // [sp,#4] 1406446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t y, // [sp,#8] 1407446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t count, // [sp,#12] 1408446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t r, // [sp,#16] 1409446788007efe0a673d0366284026adfa17b36fedSimon Hosie * uint16_t *tab); // [sp,#20] 1410446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1411446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU1_K) 1412446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1413446788007efe0a673d0366284026adfa17b36fedSimon Hosie vpush {d8-d15} 1414446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r5, [sp,#120] 1415446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r8, [sp,#108] 1416446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r6, [sp,#112] 1417446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r9, r2, r8 1418446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r7, r3, r6 1419446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r2, [sp,#104] 1420446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r3, [sp,#116] 1421446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r9, r9, r3 1422446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r7, r7, #1 1423446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1424446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [sp,#124] 1425446788007efe0a673d0366284026adfa17b36fedSimon Hosie 14264c8f2477285848ab0a4f33ad854de9398d332e8cJason Sams add r1, r1, r8 1427446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1428446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r6, r5 1429446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r6, r5 1430446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r7, r5 1431446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r7, r5 1432446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r8, r5 1433446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r8, r5 1434446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r9, r5 1435446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r9, r5 1436446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1437446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r4, r8, r9 1438446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r4, r4, r3 1439446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1440446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d0,d1,d2,d3}, [r12]! 1441446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d4,d5,d6}, [r12]! 1442446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1443446788007efe0a673d0366284026adfa17b36fedSimon Hosie adr lr, 1f 1444446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irep r, TUNED_LIST1 1445446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r5, #\r 1446446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls convolve1_\r 1447446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 1448446788007efe0a673d0366284026adfa17b36fedSimon Hosie b convolve1_25 1449446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1450446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: vpop {d8-d15} 1451446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 1452446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU1_K) 1453446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1454446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU4_K( 1455446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *out, // r0 1456446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *in, // r1 1457446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t w, // r2 1458446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t h, // r3 1459446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t p, // [sp] 1460446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t x, // [sp,#4] 1461446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t y, // [sp,#8] 1462446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t count, // [sp,#12] 1463446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t r, // [sp,#16] 1464446788007efe0a673d0366284026adfa17b36fedSimon Hosie * uint16_t *tab); // [sp,#20] 1465446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1466446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU4_K) 1467446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1468446788007efe0a673d0366284026adfa17b36fedSimon Hosie vpush {d8-d15} 1469446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r5, [sp,#120] 1470446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r8, [sp,#108] 1471446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r6, [sp,#112] 1472446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r9, r2, r8 1473446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r7, r3, r6 1474446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r2, [sp,#104] 1475446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r3, [sp,#116] 1476446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r9, r9, r3 1477446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r7, r7, #1 1478446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1479446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [sp,#124] 1480446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1481446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, r8, LSL #2 1482446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1483446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r6, r5 1484446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r6, r5 1485446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r7, r5 1486446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r7, r5 1487446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r8, r5 1488446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r8, r5 1489446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r9, r5 1490446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r9, r5 1491446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1492446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov r3, r3, LSL #2 1493446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r4, r8, r9 1494446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r4, r3, r4, LSL #2 1495446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1496446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d0,d1,d2,d3}, [r12]! 1497446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d4,d5,d6}, [r12]! 1498446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1499446788007efe0a673d0366284026adfa17b36fedSimon Hosie adr lr, 1f 1500446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irep r, TUNED_LIST4 1501446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r5, #\r 1502446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls convolve4_\r 1503446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 1504446788007efe0a673d0366284026adfa17b36fedSimon Hosie b convolve4_25 1505446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1506446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: vpop {d8-d15} 1507446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 1508446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU4_K) 1509