1446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* 2446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Copyright (C) 2014 The Android Open Source Project 3446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 4446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License"); 5446788007efe0a673d0366284026adfa17b36fedSimon Hosie * you may not use this file except in compliance with the License. 6446788007efe0a673d0366284026adfa17b36fedSimon Hosie * You may obtain a copy of the License at 7446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 8446788007efe0a673d0366284026adfa17b36fedSimon Hosie * http://www.apache.org/licenses/LICENSE-2.0 9446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 10446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Unless required by applicable law or agreed to in writing, software 11446788007efe0a673d0366284026adfa17b36fedSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS, 12446788007efe0a673d0366284026adfa17b36fedSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13446788007efe0a673d0366284026adfa17b36fedSimon Hosie * See the License for the specific language governing permissions and 14446788007efe0a673d0366284026adfa17b36fedSimon Hosie * limitations under the License. 15446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 16446788007efe0a673d0366284026adfa17b36fedSimon Hosie 17446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart 18446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define END(f) .fnend; .size f, .-f; 19446788007efe0a673d0366284026adfa17b36fedSimon Hosie 20446788007efe0a673d0366284026adfa17b36fedSimon Hosie.eabi_attribute 25,1 @Tag_ABI_align8_preserved 21446788007efe0a673d0366284026adfa17b36fedSimon Hosie.arm 22446788007efe0a673d0366284026adfa17b36fedSimon Hosie 23446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Number of fractional bits to preserve in intermediate results. The 24446788007efe0a673d0366284026adfa17b36fedSimon Hosie * intermediate storage is 16-bit, and we started with 8 bit data (the integer 25446788007efe0a673d0366284026adfa17b36fedSimon Hosie * part), so this should be between 0 and 8. 26446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 27446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set FRACTION_BITS, 7 28446788007efe0a673d0366284026adfa17b36fedSimon Hosie 29446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set MAX_R, 25 30446788007efe0a673d0366284026adfa17b36fedSimon Hosie 31446788007efe0a673d0366284026adfa17b36fedSimon Hosie 32446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* A quick way of making a line of code conditional on some other condition. 33446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with 34446788007efe0a673d0366284026adfa17b36fedSimon Hosie * `ifcc`: 35446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 36446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro ifcc zzz:vararg 37446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if cc 38446788007efe0a673d0366284026adfa17b36fedSimon Hosie \zzz 39446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif 40446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 41446788007efe0a673d0366284026adfa17b36fedSimon Hosie 42446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fetch 16 columns of bytes (regardless of image format), convolve these 43446788007efe0a673d0366284026adfa17b36fedSimon Hosie * vertically, and leave them in the register file. If working near the top or 44446788007efe0a673d0366284026adfa17b36fedSimon Hosie * bottom of an image then clamp the addressing while loading the data in. 45446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 46446788007efe0a673d0366284026adfa17b36fedSimon Hosie * The convolution is fully unrolled for windows up to max_r, with the 47446788007efe0a673d0366284026adfa17b36fedSimon Hosie * outermost edges calculated first. This way it's possible to branch directly 48446788007efe0a673d0366284026adfa17b36fedSimon Hosie * into the relevant part of the code for an arbitrary convolution radius. Two 49446788007efe0a673d0366284026adfa17b36fedSimon Hosie * variants of the loop are produced; one eliminates the clamping code for a 50446788007efe0a673d0366284026adfa17b36fedSimon Hosie * slight speed advantage. 51446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 52446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Where the macro is called with reg=x, the specified register is taken to 53446788007efe0a673d0366284026adfa17b36fedSimon Hosie * contain a pre-calculated pointer into one of the two loops. 54446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 55446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 56446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 -- src 57446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r2 -- pitch 58446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r5 -- r 59446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r6 -- rup 60446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r7 -- rdn 61446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r12 -- switch index 62446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q0-q3 -- coefficient table 63446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 64446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 += 16 65446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q10,q11 -- 16 convolved columns 66446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 67446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r10 = upper row pointer 68446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r11 = lower row pointer 69446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q12-q15 = temporary sums 70446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 71446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=r12 /*{{{*/ 72446788007efe0a673d0366284026adfa17b36fedSimon Hosie .ifc \reg,r12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif 73446788007efe0a673d0366284026adfa17b36fedSimon Hosie 74446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d30,d31}, [r1] 75446788007efe0a673d0366284026adfa17b36fedSimon Hosie mls r10, r2, r6, r1 76446788007efe0a673d0366284026adfa17b36fedSimon Hosie 77446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmovl.u8 q14, d30 78446788007efe0a673d0366284026adfa17b36fedSimon Hosie pld [r1, #32] 79446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmovl.u8 q15, d31 80446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \max_r < 16 // approximate 81446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc adr \reg, 1f 82446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 83446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc ldr \reg, 2f 84446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: ifcc add \reg, \reg, pc 85446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 86446788007efe0a673d0366284026adfa17b36fedSimon Hosie 87446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q12, d28, d0[0] 88446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc sub \reg, r5, LSL #6 89446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q13, d29, d0[0] 90446788007efe0a673d0366284026adfa17b36fedSimon Hosie mla r11, r2, r7, r1 91446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d30, d0[0] 92446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, #16 93446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d31, d0[0] 94446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx \reg 95446788007efe0a673d0366284026adfa17b36fedSimon Hosie 96446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc .align 2 97446788007efe0a673d0366284026adfa17b36fedSimon Hosie 2: ifcc .word 1f-1b-8 98446788007efe0a673d0366284026adfa17b36fedSimon Hosie 99446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irp rowclamp, 1, 0 100446788007efe0a673d0366284026adfa17b36fedSimon Hosie .set cc, \rowclamp 101446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 102446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irp dreg, 6, 5, 4, 3, 2, 1, 0 ; .irp lane, 3, 2, 1, 0 103446788007efe0a673d0366284026adfa17b36fedSimon Hosie .set i, \dreg * 4 + \lane 104446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if 0 < i && i <= \max_r 105446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \rowclamp 106446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d20,d21}, [r10] 107446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d22,d23}, [r11] 108446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r6, #i 109446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 110446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d20,d21}, [r10], r2 111446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.8 {d22,d23}, [r11] 112446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r11, r11, r2 113446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 114446788007efe0a673d0366284026adfa17b36fedSimon Hosie vswp d21, d22 115446788007efe0a673d0366284026adfa17b36fedSimon Hosie pld [r10, #32] 116446788007efe0a673d0366284026adfa17b36fedSimon Hosie vaddl.u8 q10, d20, d21 117446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc addhs r10, r10, r2 118446788007efe0a673d0366284026adfa17b36fedSimon Hosie vaddl.u8 q11, d22, d23 119446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc cmp r7, #i 120446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q12, d20, d\dreg[\lane] 121446788007efe0a673d0366284026adfa17b36fedSimon Hosie pld [r11, #32] 122446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q13, d21, d\dreg[\lane] 123446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc subhs r11, r11, r2 124446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d22, d\dreg[\lane] 125446788007efe0a673d0366284026adfa17b36fedSimon Hosie ifcc nop 126446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d23, d\dreg[\lane] 127446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 128446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr ; .endr 129446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \rowclamp == 1 130446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1: \labelc : 131446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 2f 132446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 133446788007efe0a673d0366284026adfa17b36fedSimon Hosie 2: \labelnc : 134446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 135446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 136446788007efe0a673d0366284026adfa17b36fedSimon Hosie 137446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d20, q12, #16 - FRACTION_BITS 138446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d21, q13, #16 - FRACTION_BITS 139446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d22, q14, #16 - FRACTION_BITS 140446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d23, q15, #16 - FRACTION_BITS 141446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm /*}}}*/ 142446788007efe0a673d0366284026adfa17b36fedSimon Hosie 143446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Some portion of the convolution window (as much as will fit, and all of it 144446788007efe0a673d0366284026adfa17b36fedSimon Hosie * for the uchar1 cases) is kept in the register file to avoid unnecessary 145446788007efe0a673d0366284026adfa17b36fedSimon Hosie * memory accesses. This forces the horizontal loops to be unrolled because 146446788007efe0a673d0366284026adfa17b36fedSimon Hosie * there's no indexed addressing into the register file. 147446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 148446788007efe0a673d0366284026adfa17b36fedSimon Hosie * As in the fetch macro, the operations are ordered from outside to inside, so 149446788007efe0a673d0366284026adfa17b36fedSimon Hosie * that jumping into the middle of the block bypasses the unwanted window taps. 150446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 151446788007efe0a673d0366284026adfa17b36fedSimon Hosie * There are several variants of the macro because of the fixed offets of the 152446788007efe0a673d0366284026adfa17b36fedSimon Hosie * taps -- the wider the maximum radius the further the centre tap is from the 153446788007efe0a673d0366284026adfa17b36fedSimon Hosie * most recently fetched data. This means that pre-filling the window requires 154446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more data that won't be used and it means that rotating the window involves 155446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more mov operations. 156446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 157446788007efe0a673d0366284026adfa17b36fedSimon Hosie * When the buffer gets too big the buffer at [r9] is used. 158446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 159446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 160446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q4-q11 -- convoltion window 161446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r9 -- pointer to additional convolution window data 162446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 163446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r9 -- updated buffer pointer (if used) 164446788007efe0a673d0366284026adfa17b36fedSimon Hosie * d31 -- result to be stored 165446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 166446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r12 -- temp buffer pointer 167446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q12-q13 -- temporaries for load and vext operations. 168446788007efe0a673d0366284026adfa17b36fedSimon Hosie * q14-q15 -- intermediate sums 169446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 170446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST1 8, 16 171446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_8/*{{{*/ 172446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d18, d0[0] 173446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d19, d0[0] 174446788007efe0a673d0366284026adfa17b36fedSimon Hosie 175446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 176446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 177446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 178446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 179446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 180446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 181446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 182446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 183446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 184446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 185446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 186446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: vmlal.u16 q14, d16, d2[0] 187446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d17, d2[0] 188446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d2[0] 189446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d2[0] 190446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: vext.u16 q12, q8, q9, #1 191446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #7 192446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 193446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 194446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[3] 195446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[3] 196446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: vext.u16 q12, q8, q9, #2 197446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #6 198446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 199446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 200446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[2] 201446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[2] 202446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: vext.u16 q12, q8, q9, #3 203446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #5 204446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 205446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 206446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[1] 207446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[1] 208446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: //vext.u16 q12, q8, q9, #4 209446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q9, q10, #4 210446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d1[0] 211446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d1[0] 212446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d1[0] 213446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d1[0] 214446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: vext.u16 q12, q8, q9, #5 215446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #3 216446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 217446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 218446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[3] 219446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[3] 220446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: vext.u16 q12, q8, q9, #6 221446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #2 222446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 223446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 224446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[2] 225446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[2] 226446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: vext.u16 q12, q8, q9, #7 227446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #1 228446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 229446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[1] 230446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[1] 231446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[1] 232446788007efe0a673d0366284026adfa17b36fedSimon Hosie 233446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 234446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 235446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 236446788007efe0a673d0366284026adfa17b36fedSimon Hosie 237446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 238446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 239446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 240446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 241446788007efe0a673d0366284026adfa17b36fedSimon Hosie 242446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_16/*{{{*/ 243446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d16, d0[0] 244446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d17, d0[0] 245446788007efe0a673d0366284026adfa17b36fedSimon Hosie 246446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 247446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 248446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 249446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 250446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 251446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 252446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 253446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 254446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 255446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 256446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 257446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 109f-100b 258446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 110f-100b 259446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 111f-100b 260446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 112f-100b 261446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 113f-100b 262446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 114f-100b 263446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 115f-100b 264446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 116f-100b 265446788007efe0a673d0366284026adfa17b36fedSimon Hosie 116: //vext.u16 q12, q6, q7, #0 266446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q10, q11, #0 267446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d12, d4[0] 268446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d13, d4[0] 269446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d4[0] 270446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d4[0] 271446788007efe0a673d0366284026adfa17b36fedSimon Hosie 115: vext.u16 q12, q6, q7, #1 272446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #7 273446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[3] 274446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[3] 275446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[3] 276446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[3] 277446788007efe0a673d0366284026adfa17b36fedSimon Hosie 114: vext.u16 q12, q6, q7, #2 278446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #6 279446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[2] 280446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[2] 281446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[2] 282446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[2] 283446788007efe0a673d0366284026adfa17b36fedSimon Hosie 113: vext.u16 q12, q6, q7, #3 284446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #5 285446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[1] 286446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[1] 287446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[1] 288446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[1] 289446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: //vext.u16 q12, q6, q7, #4 290446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q9, q10, #4 291446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d13, d3[0] 292446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d14, d3[0] 293446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d3[0] 294446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d3[0] 295446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: vext.u16 q12, q6, q7, #5 296446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #3 297446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[3] 298446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[3] 299446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[3] 300446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[3] 301446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: vext.u16 q12, q6, q7, #6 302446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #2 303446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[2] 304446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[2] 305446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[2] 306446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[2] 307446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: vext.u16 q12, q6, q7, #7 308446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #1 309446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[1] 310446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[1] 311446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[1] 312446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[1] 313446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: //vext.u16 q12, q7, q8, #0 314446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q9, q10, #0 315446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d14, d2[0] 316446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d15, d2[0] 317446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d18, d2[0] 318446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d19, d2[0] 319446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: vext.u16 q12, q7, q8, #1 320446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #7 321446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 322446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 323446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[3] 324446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[3] 325446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: vext.u16 q12, q7, q8, #2 326446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #6 327446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 328446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 329446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[2] 330446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[2] 331446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: vext.u16 q12, q7, q8, #3 332446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #5 333446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 334446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 335446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[1] 336446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[1] 337446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: //vext.u16 q12, q7, q8, #4 338446788007efe0a673d0366284026adfa17b36fedSimon Hosie //vext.u16 q13, q8, q9, #4 339446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d15, d1[0] 340446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d16, d1[0] 341446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d1[0] 342446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d1[0] 343446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: vext.u16 q12, q7, q8, #5 344446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #3 345446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 346446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 347446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[3] 348446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[3] 349446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: vext.u16 q12, q7, q8, #6 350446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #2 351446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 352446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 353446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[2] 354446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[2] 355446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: vext.u16 q12, q7, q8, #7 356446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #1 357446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 358446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[1] 359446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[1] 360446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[1] 361446788007efe0a673d0366284026adfa17b36fedSimon Hosie 362446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 363446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 364446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 365446788007efe0a673d0366284026adfa17b36fedSimon Hosie 366446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 367446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 368446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 369446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 370446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 371446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 372446788007efe0a673d0366284026adfa17b36fedSimon Hosie 373446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_25/*{{{*/ 374446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q12, q6, q7, #7 375446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d24, d0[0] 376446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d25, d0[0] 377446788007efe0a673d0366284026adfa17b36fedSimon Hosie 378446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 379446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 380446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 381446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 382446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 383446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 384446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 385446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 386446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 387446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 388446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 389446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 109f-100b 390446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 110f-100b 391446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 111f-100b 392446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 112f-100b 393446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 113f-100b 394446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 114f-100b 395446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 115f-100b 396446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 116f-100b 397446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 117f-100b 398446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 118f-100b 399446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 119f-100b 400446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 120f-100b 401446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 121f-100b 402446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 122f-100b 403446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 123f-100b 404446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 124f-100b 405446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 125f-100b 406446788007efe0a673d0366284026adfa17b36fedSimon Hosie 125: vext.u16 q12, q3, q4, #6 407446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q10, q11, #0 408446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d6[1] 409446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d6[1] 410446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d6[1] 411446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d6[1] 412446788007efe0a673d0366284026adfa17b36fedSimon Hosie 124: vext.u16 q12, q3, q4, #7 413446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #7 414446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d6[0] 415446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d6[0] 416446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d6[0] 417446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d6[0] 418446788007efe0a673d0366284026adfa17b36fedSimon Hosie 123: vext.u16 q12, q4, q5, #0 419446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #6 420446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[3] 421446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[3] 422446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d5[3] 423446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d5[3] 424446788007efe0a673d0366284026adfa17b36fedSimon Hosie 122: vext.u16 q12, q4, q5, #1 425446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #5 426446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[2] 427446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[2] 428446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d5[2] 429446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d5[2] 430446788007efe0a673d0366284026adfa17b36fedSimon Hosie 121: vext.u16 q12, q4, q5, #2 431446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #4 432446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[1] 433446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[1] 434446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d5[1] 435446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d5[1] 436446788007efe0a673d0366284026adfa17b36fedSimon Hosie 120: vext.u16 q12, q4, q5, #3 437446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #3 438446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[0] 439446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[0] 440446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d5[0] 441446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d5[0] 442446788007efe0a673d0366284026adfa17b36fedSimon Hosie 119: vext.u16 q12, q4, q5, #4 443446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #2 444446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[3] 445446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[3] 446446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d4[3] 447446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d4[3] 448446788007efe0a673d0366284026adfa17b36fedSimon Hosie 118: vext.u16 q12, q4, q5, #5 449446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #1 450446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[2] 451446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[2] 452446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d4[2] 453446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d4[2] 454446788007efe0a673d0366284026adfa17b36fedSimon Hosie 117: vext.u16 q12, q4, q5, #6 455446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q9, q10, #0 456446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[1] 457446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[1] 458446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d4[1] 459446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d4[1] 460446788007efe0a673d0366284026adfa17b36fedSimon Hosie 116: vext.u16 q12, q4, q5, #7 461446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #7 462446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[0] 463446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[0] 464446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d4[0] 465446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d4[0] 466446788007efe0a673d0366284026adfa17b36fedSimon Hosie 115: vext.u16 q12, q5, q6, #0 467446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #6 468446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[3] 469446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[3] 470446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[3] 471446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[3] 472446788007efe0a673d0366284026adfa17b36fedSimon Hosie 114: vext.u16 q12, q5, q6, #1 473446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #5 474446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[2] 475446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[2] 476446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[2] 477446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[2] 478446788007efe0a673d0366284026adfa17b36fedSimon Hosie 113: vext.u16 q12, q5, q6, #2 479446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #4 480446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[1] 481446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[1] 482446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[1] 483446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[1] 484446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: vext.u16 q12, q5, q6, #3 485446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #3 486446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[0] 487446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[0] 488446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[0] 489446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d3[0] 490446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: vext.u16 q12, q5, q6, #4 491446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #2 492446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[3] 493446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[3] 494446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[3] 495446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[3] 496446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: vext.u16 q12, q5, q6, #5 497446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #1 498446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[2] 499446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[2] 500446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[2] 501446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[2] 502446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: vext.u16 q12, q5, q6, #6 503446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q8, q9, #0 504446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[1] 505446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[1] 506446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[1] 507446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[1] 508446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: vext.u16 q12, q5, q6, #7 509446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #7 510446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[0] 511446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[0] 512446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[0] 513446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[0] 514446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: vext.u16 q12, q6, q7, #0 515446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #6 516446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 517446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 518446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[3] 519446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[3] 520446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: vext.u16 q12, q6, q7, #1 521446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #5 522446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 523446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 524446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[2] 525446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[2] 526446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: vext.u16 q12, q6, q7, #2 527446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #4 528446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 529446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 530446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[1] 531446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[1] 532446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: vext.u16 q12, q6, q7, #3 533446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #3 534446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[0] 535446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[0] 536446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[0] 537446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[0] 538446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: vext.u16 q12, q6, q7, #4 539446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #2 540446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 541446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 542446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[3] 543446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[3] 544446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: vext.u16 q12, q6, q7, #5 545446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #1 546446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 547446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 548446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[2] 549446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[2] 550446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: vext.u16 q12, q6, q7, #6 551446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q13, q7, q8, #0 552446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 553446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[1] 554446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[1] 555446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[1] 556446788007efe0a673d0366284026adfa17b36fedSimon Hosie 557446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 558446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 559446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 560446788007efe0a673d0366284026adfa17b36fedSimon Hosie 561446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov d7, d9 562446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q4, q5 563446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q5, q6 564446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 565446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 566446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 567446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 568446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 569446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 570446788007efe0a673d0366284026adfa17b36fedSimon Hosie 571446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST4 6, 12 572446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_6/*{{{*/ 573446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d14, d0[0] 574446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d15, d0[0] 575446788007efe0a673d0366284026adfa17b36fedSimon Hosie 576446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 577446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 578446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 579446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 580446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 581446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 582446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 583446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 584446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 585446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: vmlal.u16 q14, d8, d1[2] 586446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d9, d1[2] 587446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d1[2] 588446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d1[2] 589446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: vmlal.u16 q14, d9, d1[1] 590446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d10, d1[1] 591446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d1[1] 592446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d1[1] 593446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: vmlal.u16 q14, d10, d1[0] 594446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d11, d1[0] 595446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d18, d1[0] 596446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d19, d1[0] 597446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: vmlal.u16 q14, d11, d0[3] 598446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d12, d0[3] 599446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d0[3] 600446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d0[3] 601446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: vmlal.u16 q14, d12, d0[2] 602446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d13, d0[2] 603446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d16, d0[2] 604446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d17, d0[2] 605446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: vmlal.u16 q14, d13, d0[1] 606446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d14, d0[1] 607446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d15, d0[1] 608446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d16, d0[1] 609446788007efe0a673d0366284026adfa17b36fedSimon Hosie 610446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 611446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 612446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 613446788007efe0a673d0366284026adfa17b36fedSimon Hosie 614446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q4, q5 615446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q5, q6 616446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 617446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 618446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 619446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 620446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 621446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 622446788007efe0a673d0366284026adfa17b36fedSimon Hosie 623446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_12/*{{{*/ 624446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d8, d0[0] 625446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d9, d0[0] 626446788007efe0a673d0366284026adfa17b36fedSimon Hosie 627446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 628446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 629446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 630446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 631446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 632446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 633446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 634446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 635446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 636446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 637446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 638446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 109f-100b 639446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 110f-100b 640446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 111f-100b 641446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 112f-100b 642446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: add r12, r9, #0x1a0 643446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 644446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 645446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[0] 646446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[0] 647446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d3[0] 648446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d3[0] 649446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: add r12, r9, #0x1a8 650446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 651446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 652446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 653446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 654446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[3] 655446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[3] 656446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d2[3] 657446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d2[3] 658446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: add r12, r9, #0x1b0 659446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 660446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 661446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[2] 662446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[2] 663446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d18, d2[2] 664446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d19, d2[2] 665446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: add r12, r9, #0x1b8 666446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 667446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 668446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 669446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 670446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[1] 671446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[1] 672446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d2[1] 673446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d2[1] 674446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: add r12, r9, #0x1c0 675446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 676446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 677446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[0] 678446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[0] 679446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d16, d2[0] 680446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d17, d2[0] 681446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: add r12, r9, #0x1c8 682446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 683446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 684446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 685446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 686446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 687446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 688446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d15, d1[3] 689446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d16, d1[3] 690446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: add r12, r9, #0x1d0 691446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 692446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 693446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 694446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 695446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d14, d1[2] 696446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d15, d1[2] 697446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: add r12, r9, #0x1d8 698446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 699446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 700446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 701446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 702446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 703446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 704446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d13, d1[1] 705446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d14, d1[1] 706446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: add r12, r9, #0x1e0 707446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 708446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 709446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[0] 710446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[0] 711446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d12, d1[0] 712446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d13, d1[0] 713446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: add r12, r9, #0x1e8 714446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 715446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 716446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 717446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 718446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 719446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 720446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d11, d0[3] 721446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d12, d0[3] 722446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: add r12, r9, #0x1f0 723446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 724446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 725446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 726446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 727446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d10, d0[2] 728446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d11, d0[2] 729446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: add r12, r9, #0x1f8 730446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 731446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64] 732446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 733446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d8, d0[1] 734446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d9, d0[1] 735446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d10, d0[1] 736446788007efe0a673d0366284026adfa17b36fedSimon Hosie 737446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 738446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 739446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 740446788007efe0a673d0366284026adfa17b36fedSimon Hosie 741446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {q4}, [r9:128]! 742446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r9, r9, #0x200 743446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q4, q5 744446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q5, q6 745446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 746446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 747446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 748446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 749446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 750446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 751446788007efe0a673d0366284026adfa17b36fedSimon Hosie 752446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_25/*{{{*/ 753446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x198 754446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 755446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 756446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 757446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12:64] 758446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q14, d24, d0[0] 759446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmull.u16 q15, d25, d0[0] 760446788007efe0a673d0366284026adfa17b36fedSimon Hosie 761446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [pc, r5, LSL #2] 762446788007efe0a673d0366284026adfa17b36fedSimon Hosie add pc, pc, r12 763446788007efe0a673d0366284026adfa17b36fedSimon Hosie bkpt 764446788007efe0a673d0366284026adfa17b36fedSimon Hosie 100: .word 101f-100b 765446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 102f-100b 766446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 103f-100b 767446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 104f-100b 768446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 105f-100b 769446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 106f-100b 770446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 107f-100b 771446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 108f-100b 772446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 109f-100b 773446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 110f-100b 774446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 111f-100b 775446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 112f-100b 776446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 113f-100b 777446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 114f-100b 778446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 115f-100b 779446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 116f-100b 780446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 117f-100b 781446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 118f-100b 782446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 119f-100b 783446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 120f-100b 784446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 121f-100b 785446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 122f-100b 786446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 123f-100b 787446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 124f-100b 788446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word 125f-100b 789446788007efe0a673d0366284026adfa17b36fedSimon Hosie 125: add r12, r9, #0x0d0 790446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 791446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 792446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d6[1] 793446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d6[1] 794446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d20, d6[1] 795446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d21, d6[1] 796446788007efe0a673d0366284026adfa17b36fedSimon Hosie 124: add r12, r9, #0x0d8 797446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 798446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 799446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 800446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 801446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d6[0] 802446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d6[0] 803446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d19, d6[0] 804446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d20, d6[0] 805446788007efe0a673d0366284026adfa17b36fedSimon Hosie 123: add r12, r9, #0x0e0 806446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 807446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 808446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[3] 809446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[3] 810446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d18, d5[3] 811446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d19, d5[3] 812446788007efe0a673d0366284026adfa17b36fedSimon Hosie 122: add r12, r9, #0x0e8 813446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 814446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 815446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 816446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 817446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[2] 818446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[2] 819446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d17, d5[2] 820446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d18, d5[2] 821446788007efe0a673d0366284026adfa17b36fedSimon Hosie 121: add r12, r9, #0x0f0 822446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 823446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 824446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[1] 825446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[1] 826446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d16, d5[1] 827446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d17, d5[1] 828446788007efe0a673d0366284026adfa17b36fedSimon Hosie 120: add r12, r9, #0x0f8 829446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 830446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 831446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 832446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 833446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d5[0] 834446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d5[0] 835446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d15, d5[0] 836446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d16, d5[0] 837446788007efe0a673d0366284026adfa17b36fedSimon Hosie 119: add r12, r9, #0x100 838446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 839446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 840446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[3] 841446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[3] 842446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d14, d4[3] 843446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d15, d4[3] 844446788007efe0a673d0366284026adfa17b36fedSimon Hosie 118: add r12, r9, #0x108 845446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 846446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 847446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 848446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 849446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[2] 850446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[2] 851446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d13, d4[2] 852446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d14, d4[2] 853446788007efe0a673d0366284026adfa17b36fedSimon Hosie 117: add r12, r9, #0x110 854446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 855446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 856446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[1] 857446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[1] 858446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d12, d4[1] 859446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d13, d4[1] 860446788007efe0a673d0366284026adfa17b36fedSimon Hosie 116: add r12, r9, #0x118 861446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 862446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 863446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 864446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 865446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d4[0] 866446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d4[0] 867446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d11, d4[0] 868446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d12, d4[0] 869446788007efe0a673d0366284026adfa17b36fedSimon Hosie 115: add r12, r9, #0x120 870446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 871446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 872446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[3] 873446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[3] 874446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d10, d3[3] 875446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d11, d3[3] 876446788007efe0a673d0366284026adfa17b36fedSimon Hosie 114: add r12, r9, #0x128 877446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 878446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 879446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 880446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 881446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[2] 882446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[2] 883446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d9, d3[2] 884446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d10, d3[2] 885446788007efe0a673d0366284026adfa17b36fedSimon Hosie 113: add r12, r9, #0x130 886446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 887446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 888446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[1] 889446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[1] 890446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d8, d3[1] 891446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d9, d3[1] 892446788007efe0a673d0366284026adfa17b36fedSimon Hosie 112: add r12, r9, #0x138 893446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 894446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 895446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 896446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 897446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1f8 898446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 899446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64] 900446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d3[0] 901446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d3[0] 902446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d3[0] @ Could be d7, without the load, right? 903446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d8, d3[0] 904446788007efe0a673d0366284026adfa17b36fedSimon Hosie 111: add r12, r9, #0x140 905446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 906446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 907446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1f0 908446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 909446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 910446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[3] 911446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[3] 912446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[3] 913446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[3] 914446788007efe0a673d0366284026adfa17b36fedSimon Hosie 110: add r12, r9, #0x148 915446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 916446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 917446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 918446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 919446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1e8 920446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 921446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 922446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 923446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 924446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[2] 925446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[2] 926446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[2] 927446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[2] 928446788007efe0a673d0366284026adfa17b36fedSimon Hosie 109: add r12, r9, #0x150 929446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 930446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 931446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1e0 932446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 933446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 934446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[1] 935446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[1] 936446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[1] 937446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[1] 938446788007efe0a673d0366284026adfa17b36fedSimon Hosie 108: add r12, r9, #0x158 939446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 940446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 941446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 942446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 943446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1d8 944446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 945446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 946446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 947446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 948446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d2[0] 949446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d2[0] 950446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d2[0] 951446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d2[0] 952446788007efe0a673d0366284026adfa17b36fedSimon Hosie 107: add r12, r9, #0x160 953446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 954446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 955446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1d0 956446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 957446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 958446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[3] 959446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[3] 960446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[3] 961446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[3] 962446788007efe0a673d0366284026adfa17b36fedSimon Hosie 106: add r12, r9, #0x168 963446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 964446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 965446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 966446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 967446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1c8 968446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 969446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 970446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 971446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 972446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[2] 973446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[2] 974446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[2] 975446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[2] 976446788007efe0a673d0366284026adfa17b36fedSimon Hosie 105: add r12, r9, #0x170 977446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 978446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 979446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1c0 980446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 981446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 982446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[1] 983446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[1] 984446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[1] 985446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[1] 986446788007efe0a673d0366284026adfa17b36fedSimon Hosie 104: add r12, r9, #0x178 987446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 988446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 989446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 990446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 991446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1b8 992446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 993446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 994446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 995446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 996446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d1[0] 997446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d1[0] 998446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d1[0] 999446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d1[0] 1000446788007efe0a673d0366284026adfa17b36fedSimon Hosie 103: add r12, r9, #0x180 1001446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1002446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128] 1003446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1b0 1004446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1005446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 1006446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[3] 1007446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[3] 1008446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[3] 1009446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[3] 1010446788007efe0a673d0366284026adfa17b36fedSimon Hosie 102: add r12, r9, #0x188 1011446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1012446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24}, [r12:64]! 1013446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1014446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d25}, [r12] 1015446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r12, r9, #0x1a8 1016446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1017446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26}, [r12:64]! 1018446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1019446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d27}, [r12:64] 1020446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[2] 1021446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[2] 1022446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[2] 1023446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[2] 1024446788007efe0a673d0366284026adfa17b36fedSimon Hosie 101: add r12, r9, #0x190 1025446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1026446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d24,d25}, [r12:128]! 1027446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r12, r12, #0x200 1028446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d26,d27}, [r12:128] 1029446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d24, d0[1] 1030446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d25, d0[1] 1031446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q14, d26, d0[1] 1032446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmlal.u16 q15, d27, d0[1] 1033446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1034446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d28, q14, #16 1035446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u32 d29, q15, #16 1036446788007efe0a673d0366284026adfa17b36fedSimon Hosie vqrshrn.u16 d31, q14, #FRACTION_BITS 1037446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1038446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {q4}, [r9:128]! 1039446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r9, r9, #0x200 1040446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q4, q5 1041446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q5, q6 1042446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q6, q7 1043446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q7, q8 1044446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q8, q9 1045446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q9, q10 1046446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 1047446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/ 1048446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1049446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Dedicated function wrapper for the fetch macro, for the cases where 1050446788007efe0a673d0366284026adfa17b36fedSimon Hosie * performance isn't that important, to keep code size down. 1051446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1052446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(fetch_generic_asm) 1053446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r10,r11} 1054446788007efe0a673d0366284026adfa17b36fedSimon Hosie fetch 1055446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r10,r11} 1056446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx lr 1057446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(fetch_generic_asm) 1058446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1059446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Given values in q10 and q11, and an index in r11, sweep the (r11&15)th value 1060446788007efe0a673d0366284026adfa17b36fedSimon Hosie * across to fill the rest of the register pair. Used for filling the right 1061446788007efe0a673d0366284026adfa17b36fedSimon Hosie * hand edge of the window when starting too close to the right hand edge of 1062446788007efe0a673d0366284026adfa17b36fedSimon Hosie * the image. 1063446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1064446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(prefetch_clamp1) 1065446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r11, r11, #0 1066446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r11, #8 1067446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1068446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 q11, q10 1069446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, #16 1070446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: vmov.u16 q12, q11 1071446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.i8 d26, #0xff 1072446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r11, #4 1073446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1074446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q12, q12, q12, #4 1075446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, #8 1076446788007efe0a673d0366284026adfa17b36fedSimon Hosie vshl.u64 d26, d26, #32 1077446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r11, #2 1078446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1079446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q12, q12, q12, #6 1080446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, #4 1081446788007efe0a673d0366284026adfa17b36fedSimon Hosie vshl.u64 d26, d26, #16 1082446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r11, #1 1083446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1084446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q12, q12, q12, #7 1085446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, #2 1086446788007efe0a673d0366284026adfa17b36fedSimon Hosie vshl.u64 d26, d26, #8 1087446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: vdup.u16 q12, d25[2] 1088446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmovl.s8 q13, d26 1089446788007efe0a673d0366284026adfa17b36fedSimon Hosie vbif q11, q12, q13 1090446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r11, #8 1091446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1092446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 1093446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q11, q12 1094446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: rsb r11, r11, #0 1095446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx lr 1096446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(prefetch_clamp1) 1097446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1098446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(prefetch_clamp4) 1099446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r11, r11, #0 1100446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r11, #8 1101446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1102446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, #16 1103446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 q11, q10 1104446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: vmov d24, d23 1105446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r11, #4 1106446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1107446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov d24, d22 1108446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, #8 1109446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov d23, d22 1110446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: vmov d25, d24 1111446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r11, #8 1112446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1113446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 1114446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q11, q12 1115446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: rsb r11, r11, #0 1116446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx lr 1117446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(prefetch_clamp4) 1118446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1119446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1120446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Helpers for prefetch, below. 1121446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1122446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_out qa, qb, store, qsa, qsb, qsb_hi 1123446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \store > 0 1124446788007efe0a673d0366284026adfa17b36fedSimon Hosie .ifc \qsa,\qsb 1125446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u16 {\qsa}, [r9:128]! 1126446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u16 {\qsb}, [r9:128]! 1127446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1128446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u16 {\qsa,\qsb}, [r9:256]! 1129446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1130446788007efe0a673d0366284026adfa17b36fedSimon Hosie .elseif \store == 0 1131446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 \qa, \qsa 1132446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 \qb, \qsb 1133446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1134446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 \qb, \qsb_hi 1135446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1136446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1137446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1138446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_one qa, qb, rem, c, store=0, step=1 1139446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set i, (need - 16) - \rem 1140446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if i >= 0 1141446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: cmp r10, #i+16 1142446788007efe0a673d0366284026adfa17b36fedSimon Hosie blo 2f 1143446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, q9, q9, d19 1144446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 1f 1145446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: cmp r11, #i+16 1146446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls 3f 1147446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, q10, q11, d23 1148446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1149446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 2f 1150446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: bl prefetch_clamp\step 1151446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, q10, q11, d23 1152446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: b 4f+4 1153446788007efe0a673d0366284026adfa17b36fedSimon Hosie @q12 contains pad word from prefetch_clam call 1154446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_out \qa, \qb, \store, q12, q12, d25 1155446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \rem > 0 1156446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 4f+4 1157446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1158446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: 1159446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: 1160446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: 1161446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: nop 1162446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1163446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif 1164446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1165446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1166446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fill the convolution window with context data. The aim here is to load 1167446788007efe0a673d0366284026adfa17b36fedSimon Hosie * exactly rlf + rrt columns, and in the main loop to read as many columns as 1168446788007efe0a673d0366284026adfa17b36fedSimon Hosie * will be written. This is complicated by the need to handle cases when the 1169446788007efe0a673d0366284026adfa17b36fedSimon Hosie * input starts very close to the left or right (or both) edges of the image, 1170446788007efe0a673d0366284026adfa17b36fedSimon Hosie * and where these do not fall on 16-byte boundaries. 1171446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 1172446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 1173446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 -- src 1174446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r2 -- pitch 1175446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r3 -- count 1176446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r4 -- inlen 1177446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r5 -- r 1178446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r6 -- rup 1179446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r7 -- rdn 1180446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r8 -- rlf 1181446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r9 -- buffer (if needed) 1182446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output: 1183446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 += rlf + min(count, rrt) 1184446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies: 1185446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r10 -- fill start index in the window 1186446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r11 -- fill stop index in the window 1187446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r12 -- scratch 1188446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1189446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch step=1, max_r=25 1190446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set need, ((\max_r + \max_r) * \step + 15) & ~15 1191446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1192446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r10, r8, #need - (\max_r * \step) 1193446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1194446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov r10, r8, LSL #2 1195446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r10, r10, #need - (\max_r * \step) 1196446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1197446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r11, r10, r4 1198446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r11, #need 1199446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r11, #need 1200446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1201446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1202446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1203446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q9, d20[0] 1204446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1205446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 d18, d20 1206446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 d19, d20 1207446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1208446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r10, #15 1209446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 2f 1210446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r12, r10, #0 1211446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r10, #8 1212446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1213446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 q11, q10 1214446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u16 q10, q9 1215446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r12, #4 1216446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1217446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q10, q11, #4 1218446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q9, q10, #4 1219446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1220446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1: tst r12, #2 1221446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1222446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q10, q11, #2 1223446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q9, q10, #2 1224446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1: tst r12, #1 1225446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1226446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q10, q11, #1 1227446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q9, q10, #1 1228446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1229446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: sub r1, r1, r10 1230446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r10, r10, #15 1231446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, r10 1232446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: 1233446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step > 1 1234446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* it's only in the uchar2 and uchar4 cases where the register file 1235446788007efe0a673d0366284026adfa17b36fedSimon Hosie * is insufficient (given MAX_R <= 25). 1236446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1237446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 192, c=\max_r, step=\step, store=1 1238446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 176, c=\max_r, step=\step, store=1 1239446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 160, c=\max_r, step=\step, store=1 1240446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 144, c=\max_r, step=\step, store=1 1241446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 128, c=\max_r, step=\step, store=1 1242446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 112, c=\max_r, step=\step, store=1 1243446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 96, c=\max_r, step=\step, store=1 1244446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 80, c=\max_r, step=\step, store=1 1245446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 64, c=\max_r, step=\step, store=1 1246446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, xx, 48, c=\max_r, step=\step, store=1 1247446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1248446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* q3 normally contains the coefficient table, but it's not fully 1249446788007efe0a673d0366284026adfa17b36fedSimon Hosie * used. In the uchar1, r=25 case the other half of q3 is used for 1250446788007efe0a673d0366284026adfa17b36fedSimon Hosie * the last two window taps to avoid falling out to memory. 1251446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1252446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one xx, d7, 48, c=\max_r, step=\step, store=-1 1253446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1254446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one q4, q5, 32, c=\max_r, step=\step, store=0 1255446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one q6, q7, 16, c=\max_r, step=\step, store=0 1256446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch_one q8, q9, 0, c=\max_r, step=\step, store=0 1257446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1258446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step == 1 1259446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r10, r8, #\max_r * \step 1260446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1261446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov r10, r8, LSL #2 1262446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r10, r10, #\max_r * \step 1263446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1264446788007efe0a673d0366284026adfa17b36fedSimon Hosie subs r4, r4, r10 1265446788007efe0a673d0366284026adfa17b36fedSimon Hosie movlo r4, #0 1266446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1267446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1268446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* The main loop. 1269446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 1270446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input: 1271446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r0 = dst 1272446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r1 = src 1273446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r2 = pitch 1274446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r3 = count 1275446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r4 = inlen 1276446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r5 = r 1277446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r6 = rup 1278446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r7 = rdn 1279446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r9 = buffer 1280446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies 1281446788007efe0a673d0366284026adfa17b36fedSimon Hosie * r8 = fetch code pointer 1282446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1283446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro mainloop core, step=1, max_r=25, labelc="", labelnc="" 1284446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r8, 3f 1285446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: add r8, r8, pc 1286446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r8, r5, LSL #5 1287446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r8, r5, LSL #4 1288446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r5, r6 1289446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmpeq r5, r7 1290446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1291446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1292446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* if (r != rup || r != rdn) then the address-clamping table should 1293446788007efe0a673d0366284026adfa17b36fedSimon Hosie * be used rather than the short-cut version. 1294446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1295446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r8, 3f+4 1296446788007efe0a673d0366284026adfa17b36fedSimon Hosie2: add r8, r8, pc 1297446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r8, r5, LSL #6 1298446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 5f 1299446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 3 1300446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: .word \labelnc-1b-8 1301446788007efe0a673d0366284026adfa17b36fedSimon Hosie .word \labelc-2b-8 1302446788007efe0a673d0366284026adfa17b36fedSimon Hosie .align 4 1303446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=r8 1304446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1305446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* For each call to fetch two are made to \core. It would be 1306446788007efe0a673d0366284026adfa17b36fedSimon Hosie * preferable to have twice the work done in \core, but the 1307446788007efe0a673d0366284026adfa17b36fedSimon Hosie * register file is too small for this to be straightforward. 1308446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1309446788007efe0a673d0366284026adfa17b36fedSimon Hosie \core 1310446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {d31}, [r0]! 1311446788007efe0a673d0366284026adfa17b36fedSimon Hosie \core 1312446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {d31}, [r0]! 1313446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1314446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r3, r3, #16 1315446788007efe0a673d0366284026adfa17b36fedSimon Hosie5: subs r4, r4, #16 1316446788007efe0a673d0366284026adfa17b36fedSimon Hosie bhs 3b 1317446788007efe0a673d0366284026adfa17b36fedSimon Hosie adds r4, r4, #16 1318446788007efe0a673d0366284026adfa17b36fedSimon Hosie bne 1f 1319446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1320446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q10, d19[3] 1321446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q11, d19[3] 1322446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1323446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d20, d19 1324446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d21, d19 1325446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d22, d19 1326446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d23, d19 1327446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1328446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 4f 1329446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1330446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: sub r1, r1, #16 1331446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, r4 1332446788007efe0a673d0366284026adfa17b36fedSimon Hosie bl fetch_generic_asm 1333446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1334446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1335446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q12, d23[3] 1336446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1337446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d24, d23 1338446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d25, d23 1339446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1340446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsb r4, r4, #0 1341446788007efe0a673d0366284026adfa17b36fedSimon Hosie tst r4, #8 1342446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1343446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q10, q11 1344446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov q11, q12 1345446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r4, #4 1346446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1347446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q10, q11, #4 1348446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q11, q12, #4 1349446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r4, #2 1350446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1351446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q10, q11, #2 1352446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q11, q12, #2 1353446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r4, #1 1354446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 4f 1355446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q10, q10, q11, #1 1356446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u16 q11, q11, q12, #1 1357446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: cmp r3, #0 1358446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1359446788007efe0a673d0366284026adfa17b36fedSimon Hosie3: \core 1360446788007efe0a673d0366284026adfa17b36fedSimon Hosie .if \step==1 1361446788007efe0a673d0366284026adfa17b36fedSimon Hosie vdup.u16 q11, d23[3] 1362446788007efe0a673d0366284026adfa17b36fedSimon Hosie .else 1363446788007efe0a673d0366284026adfa17b36fedSimon Hosie vmov.u64 d22, d23 1364446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endif 1365446788007efe0a673d0366284026adfa17b36fedSimon Hosie subs r3, r3, #8 1366446788007efe0a673d0366284026adfa17b36fedSimon Hosie blo 4f 1367446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {d31}, [r0]! 1368446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1369446788007efe0a673d0366284026adfa17b36fedSimon Hosie b 3b 1370446788007efe0a673d0366284026adfa17b36fedSimon Hosie4: tst r3, #4 1371446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1372446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u32 {d31[0]}, [r0]! 1373446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u8 d31, d31, d31, #4 1374446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r3, #2 1375446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 1f 1376446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u16 {d31[0]}, [r0]! 1377446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u8 d31, d31, d31, #2 1378446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: tst r3, #1 1379446788007efe0a673d0366284026adfa17b36fedSimon Hosie beq 5f 1380446788007efe0a673d0366284026adfa17b36fedSimon Hosie vst1.u8 {d31[0]}, [r0]! 1381446788007efe0a673d0366284026adfa17b36fedSimon Hosie vext.u8 d31, d31, d31, #1 1382446788007efe0a673d0366284026adfa17b36fedSimon Hosie5: nop 1383446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm 1384446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1385446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST1, 25 1386446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(convolve1_\r) 1387446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r12,lr} 1388446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1389446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, r8 1390446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1391446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch step=1, max_r=\r 1392446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1393446788007efe0a673d0366284026adfa17b36fedSimon Hosie mainloop core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r 1394446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1395446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r12,pc} 1396446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve1_\r) 1397446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr 1398446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1399446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST4, 25 1400446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(convolve4_\r) 1401446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r12, sp, #0x200 1402446788007efe0a673d0366284026adfa17b36fedSimon Hosie bic r9, r12, #0x3fc 1403446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov sp, r9 1404446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r12,lr} 1405446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1406446788007efe0a673d0366284026adfa17b36fedSimon Hosie /* r9 now points to a buffer on the stack whose address has the low 1407446788007efe0a673d0366284026adfa17b36fedSimon Hosie * 10 bits clear. This allows easy address calculation in the 1408446788007efe0a673d0366284026adfa17b36fedSimon Hosie * wrap-around cases. 1409446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1410446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1411446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r1, r1, r8, LSL #2 1412446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1413446788007efe0a673d0366284026adfa17b36fedSimon Hosie prefetch step=4, max_r=\r 1414446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1415446788007efe0a673d0366284026adfa17b36fedSimon Hosie mainloop core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r 1416446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1417446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r12,lr} 1418446788007efe0a673d0366284026adfa17b36fedSimon Hosie add sp, r12, #0x200 1419446788007efe0a673d0366284026adfa17b36fedSimon Hosie bx lr 1420446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve4_\r) 1421446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr 1422446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1423446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU1_K( 1424446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *out, // r0 1425446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *in, // r1 1426446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t w, // r2 1427446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t h, // r3 1428446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t p, // [sp] 1429446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t x, // [sp,#4] 1430446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t y, // [sp,#8] 1431446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t count, // [sp,#12] 1432446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t r, // [sp,#16] 1433446788007efe0a673d0366284026adfa17b36fedSimon Hosie * uint16_t *tab); // [sp,#20] 1434446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1435446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU1_K) 1436446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1437446788007efe0a673d0366284026adfa17b36fedSimon Hosie vpush {d8-d15} 1438446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r5, [sp,#120] 1439446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r8, [sp,#108] 1440446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r6, [sp,#112] 1441446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r9, r2, r8 1442446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r7, r3, r6 1443446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r2, [sp,#104] 1444446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r3, [sp,#116] 1445446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r9, r9, r3 1446446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r7, r7, #1 1447446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1448446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [sp,#124] 1449446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1450446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r0, r0, r8 @, LSL #2 /* for blur4 option */ 1451446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, r8 @, LSL #2 /* for blur4 option */ 1452446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1453446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r6, r5 1454446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r6, r5 1455446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r7, r5 1456446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r7, r5 1457446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r8, r5 1458446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r8, r5 1459446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r9, r5 1460446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r9, r5 1461446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1462446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r4, r8, r9 1463446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r4, r4, r3 1464446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1465446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d0,d1,d2,d3}, [r12]! 1466446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d4,d5,d6}, [r12]! 1467446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1468446788007efe0a673d0366284026adfa17b36fedSimon Hosie adr lr, 1f 1469446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irep r, TUNED_LIST1 1470446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r5, #\r 1471446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls convolve1_\r 1472446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 1473446788007efe0a673d0366284026adfa17b36fedSimon Hosie b convolve1_25 1474446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1475446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: vpop {d8-d15} 1476446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 1477446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU1_K) 1478446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1479446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU4_K( 1480446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *out, // r0 1481446788007efe0a673d0366284026adfa17b36fedSimon Hosie * void *in, // r1 1482446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t w, // r2 1483446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t h, // r3 1484446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t p, // [sp] 1485446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t x, // [sp,#4] 1486446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t y, // [sp,#8] 1487446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t count, // [sp,#12] 1488446788007efe0a673d0366284026adfa17b36fedSimon Hosie * size_t r, // [sp,#16] 1489446788007efe0a673d0366284026adfa17b36fedSimon Hosie * uint16_t *tab); // [sp,#20] 1490446788007efe0a673d0366284026adfa17b36fedSimon Hosie */ 1491446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU4_K) 1492446788007efe0a673d0366284026adfa17b36fedSimon Hosie push {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 1493446788007efe0a673d0366284026adfa17b36fedSimon Hosie vpush {d8-d15} 1494446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r5, [sp,#120] 1495446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r8, [sp,#108] 1496446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r6, [sp,#112] 1497446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r9, r2, r8 1498446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r7, r3, r6 1499446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r2, [sp,#104] 1500446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r3, [sp,#116] 1501446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r9, r9, r3 1502446788007efe0a673d0366284026adfa17b36fedSimon Hosie sub r7, r7, #1 1503446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1504446788007efe0a673d0366284026adfa17b36fedSimon Hosie ldr r12, [sp,#124] 1505446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1506446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r0, r0, r8, LSL #2 1507446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r1, r1, r8, LSL #2 1508446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1509446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r6, r5 1510446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r6, r5 1511446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r7, r5 1512446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r7, r5 1513446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r8, r5 1514446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r8, r5 1515446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r9, r5 1516446788007efe0a673d0366284026adfa17b36fedSimon Hosie movhi r9, r5 1517446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1518446788007efe0a673d0366284026adfa17b36fedSimon Hosie mov r3, r3, LSL #2 1519446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r4, r8, r9 1520446788007efe0a673d0366284026adfa17b36fedSimon Hosie add r4, r3, r4, LSL #2 1521446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1522446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d0,d1,d2,d3}, [r12]! 1523446788007efe0a673d0366284026adfa17b36fedSimon Hosie vld1.u16 {d4,d5,d6}, [r12]! 1524446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1525446788007efe0a673d0366284026adfa17b36fedSimon Hosie adr lr, 1f 1526446788007efe0a673d0366284026adfa17b36fedSimon Hosie .irep r, TUNED_LIST4 1527446788007efe0a673d0366284026adfa17b36fedSimon Hosie cmp r5, #\r 1528446788007efe0a673d0366284026adfa17b36fedSimon Hosie bls convolve4_\r 1529446788007efe0a673d0366284026adfa17b36fedSimon Hosie .endr 1530446788007efe0a673d0366284026adfa17b36fedSimon Hosie b convolve4_25 1531446788007efe0a673d0366284026adfa17b36fedSimon Hosie 1532446788007efe0a673d0366284026adfa17b36fedSimon Hosie1: vpop {d8-d15} 1533446788007efe0a673d0366284026adfa17b36fedSimon Hosie pop {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} 1534446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU4_K) 1535