1446788007efe0a673d0366284026adfa17b36fedSimon Hosie/*
2446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Copyright (C) 2014 The Android Open Source Project
3446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
4446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License");
5446788007efe0a673d0366284026adfa17b36fedSimon Hosie * you may not use this file except in compliance with the License.
6446788007efe0a673d0366284026adfa17b36fedSimon Hosie * You may obtain a copy of the License at
7446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
8446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      http://www.apache.org/licenses/LICENSE-2.0
9446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
10446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Unless required by applicable law or agreed to in writing, software
11446788007efe0a673d0366284026adfa17b36fedSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS,
12446788007efe0a673d0366284026adfa17b36fedSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13446788007efe0a673d0366284026adfa17b36fedSimon Hosie * See the License for the specific language governing permissions and
14446788007efe0a673d0366284026adfa17b36fedSimon Hosie * limitations under the License.
15446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
16446788007efe0a673d0366284026adfa17b36fedSimon Hosie
17446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart
185eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie#define PRIVATE(f) .text; .align 4; .type f,#function; f: .fnstart
19446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define END(f) .fnend; .size f, .-f;
20446788007efe0a673d0366284026adfa17b36fedSimon Hosie
21446788007efe0a673d0366284026adfa17b36fedSimon Hosie.eabi_attribute 25,1 @Tag_ABI_align8_preserved
22446788007efe0a673d0366284026adfa17b36fedSimon Hosie.arm
23446788007efe0a673d0366284026adfa17b36fedSimon Hosie
24446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Number of fractional bits to preserve in intermediate results.  The
25446788007efe0a673d0366284026adfa17b36fedSimon Hosie * intermediate storage is 16-bit, and we started with 8 bit data (the integer
26446788007efe0a673d0366284026adfa17b36fedSimon Hosie * part), so this should be between 0 and 8.
27446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
28446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set FRACTION_BITS, 7
29446788007efe0a673d0366284026adfa17b36fedSimon Hosie
30446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set MAX_R, 25
31446788007efe0a673d0366284026adfa17b36fedSimon Hosie
32446788007efe0a673d0366284026adfa17b36fedSimon Hosie
33446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* A quick way of making a line of code conditional on some other condition.
34446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with
35446788007efe0a673d0366284026adfa17b36fedSimon Hosie * `ifcc`:
36446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
37446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro ifcc zzz:vararg
38446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if cc
39446788007efe0a673d0366284026adfa17b36fedSimon Hosie            \zzz
40446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif
41446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
42446788007efe0a673d0366284026adfa17b36fedSimon Hosie
43446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fetch 16 columns of bytes (regardless of image format), convolve these
44446788007efe0a673d0366284026adfa17b36fedSimon Hosie * vertically, and leave them in the register file.  If working near the top or
45446788007efe0a673d0366284026adfa17b36fedSimon Hosie * bottom of an image then clamp the addressing while loading the data in.
46446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
47446788007efe0a673d0366284026adfa17b36fedSimon Hosie * The convolution is fully unrolled for windows up to max_r, with the
48446788007efe0a673d0366284026adfa17b36fedSimon Hosie * outermost edges calculated first.  This way it's possible to branch directly
49446788007efe0a673d0366284026adfa17b36fedSimon Hosie * into the relevant part of the code for an arbitrary convolution radius.  Two
50446788007efe0a673d0366284026adfa17b36fedSimon Hosie * variants of the loop are produced; one eliminates the clamping code for a
51446788007efe0a673d0366284026adfa17b36fedSimon Hosie * slight speed advantage.
52446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
53446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Where the macro is called with reg=x, the specified register is taken to
54446788007efe0a673d0366284026adfa17b36fedSimon Hosie * contain a pre-calculated pointer into one of the two loops.
55446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
56446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input:
57446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 -- src
58446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r2 -- pitch
59446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r5 -- r
60446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r6 -- rup
61446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r7 -- rdn
62446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r12 -- switch index
63446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q0-q3 -- coefficient table
64446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output:
65446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 += 16
66446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q10,q11 -- 16 convolved columns
67446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies:
68446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r10 = upper row pointer
69446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r11 = lower row pointer
70446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q12-q15 = temporary sums
71446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
72446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=r12 /*{{{*/
73446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .ifc \reg,r12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif
74446788007efe0a673d0366284026adfa17b36fedSimon Hosie
75446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d30,d31}, [r1]
76446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mls         r10, r2, r6, r1
77446788007efe0a673d0366284026adfa17b36fedSimon Hosie
78446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmovl.u8    q14, d30
79446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pld         [r1, #32]
80446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmovl.u8    q15, d31
81446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \max_r < 16 // approximate
82446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    adr         \reg, 1f
83446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
84446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    ldr         \reg, 2f
85446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:  ifcc    add         \reg, \reg, pc
86446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
87446788007efe0a673d0366284026adfa17b36fedSimon Hosie
88446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q12, d28, d0[0]
89446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    sub         \reg, r5, LSL #6
90446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q13, d29, d0[0]
91446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mla         r11, r2, r7, r1
92446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d30, d0[0]
93446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, #16
94446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d31, d0[0]
95446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          \reg
96446788007efe0a673d0366284026adfa17b36fedSimon Hosie
97446788007efe0a673d0366284026adfa17b36fedSimon Hosie     ifcc   .align 2
98446788007efe0a673d0366284026adfa17b36fedSimon Hosie  2: ifcc   .word       1f-1b-8
99446788007efe0a673d0366284026adfa17b36fedSimon Hosie
100446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .irp rowclamp, 1, 0
101446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .set cc, \rowclamp
102446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .align 4
103446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .irp dreg, 6, 5, 4, 3, 2, 1, 0 ; .irp lane, 3, 2, 1, 0
104446788007efe0a673d0366284026adfa17b36fedSimon Hosie      .set i, \dreg * 4 + \lane
105446788007efe0a673d0366284026adfa17b36fedSimon Hosie      .if 0 < i && i <= \max_r
106446788007efe0a673d0366284026adfa17b36fedSimon Hosie        .if \rowclamp
107446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d20,d21}, [r10]
108446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d22,d23}, [r11]
109446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r6, #i
110446788007efe0a673d0366284026adfa17b36fedSimon Hosie        .else
111446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d20,d21}, [r10], r2
112446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d22,d23}, [r11]
113446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r11, r11, r2
114446788007efe0a673d0366284026adfa17b36fedSimon Hosie        .endif
115446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vswp        d21, d22
116446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pld         [r10, #32]
117446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vaddl.u8    q10, d20, d21
118446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    addhs       r10, r10, r2
119446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vaddl.u8    q11, d22, d23
120446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    cmp         r7, #i
121446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q12, d20, d\dreg[\lane]
122446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pld         [r11, #32]
123446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q13, d21, d\dreg[\lane]
124446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    subhs       r11, r11, r2
125446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d22, d\dreg[\lane]
126446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    nop
127446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d23, d\dreg[\lane]
128446788007efe0a673d0366284026adfa17b36fedSimon Hosie        .endif
129446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .endr ; .endr
130446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .if \rowclamp == 1
131446788007efe0a673d0366284026adfa17b36fedSimon Hosie        1: \labelc :
132446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           2f
133446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .else
134446788007efe0a673d0366284026adfa17b36fedSimon Hosie        2: \labelnc :
135446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .endif
136446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endr
137446788007efe0a673d0366284026adfa17b36fedSimon Hosie
138446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d20, q12, #16 - FRACTION_BITS
139446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d21, q13, #16 - FRACTION_BITS
140446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d22, q14, #16 - FRACTION_BITS
141446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d23, q15, #16 - FRACTION_BITS
142446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm /*}}}*/
143446788007efe0a673d0366284026adfa17b36fedSimon Hosie
144446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Some portion of the convolution window (as much as will fit, and all of it
145446788007efe0a673d0366284026adfa17b36fedSimon Hosie * for the uchar1 cases) is kept in the register file to avoid unnecessary
146446788007efe0a673d0366284026adfa17b36fedSimon Hosie * memory accesses.  This forces the horizontal loops to be unrolled because
147446788007efe0a673d0366284026adfa17b36fedSimon Hosie * there's no indexed addressing into the register file.
148446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
149446788007efe0a673d0366284026adfa17b36fedSimon Hosie * As in the fetch macro, the operations are ordered from outside to inside, so
150446788007efe0a673d0366284026adfa17b36fedSimon Hosie * that jumping into the middle of the block bypasses the unwanted window taps.
151446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
152446788007efe0a673d0366284026adfa17b36fedSimon Hosie * There are several variants of the macro because of the fixed offets of the
153446788007efe0a673d0366284026adfa17b36fedSimon Hosie * taps -- the wider the maximum radius the further the centre tap is from the
154446788007efe0a673d0366284026adfa17b36fedSimon Hosie * most recently fetched data.  This means that pre-filling the window requires
155446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more data that won't be used and it means that rotating the window involves
156446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more mov operations.
157446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
158446788007efe0a673d0366284026adfa17b36fedSimon Hosie * When the buffer gets too big the buffer at [r9] is used.
159446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
160446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input:
161446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q4-q11 -- convoltion window
162446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r9 -- pointer to additional convolution window data
163446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output:
164446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r9 -- updated buffer pointer (if used)
165446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      d31 -- result to be stored
166446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies:
167446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r12 -- temp buffer pointer
168446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q12-q13 -- temporaries for load and vext operations.
169446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q14-q15 -- intermediate sums
170446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
171446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST1 8, 16
172446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_8/*{{{*/
173446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d18, d0[0]
174446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d19, d0[0]
175446788007efe0a673d0366284026adfa17b36fedSimon Hosie
176446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
177446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
178446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
179446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
180446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
181446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
182446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
183446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
184446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
185446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
186446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
187446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    vmlal.u16   q14, d16, d2[0]
188446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d17, d2[0]
189446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d2[0]
190446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d2[0]
191446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    vext.u16    q12, q8, q9, #1
192446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #7
193446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
194446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
195446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[3]
196446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[3]
197446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    vext.u16    q12, q8, q9, #2
198446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #6
199446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
200446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
201446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[2]
202446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[2]
203446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    vext.u16    q12, q8, q9, #3
204446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #5
205446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
206446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
207446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[1]
208446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[1]
209446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    //vext.u16    q12, q8, q9, #4
210446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q9, q10, #4
211446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d1[0]
212446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d1[0]
213446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d1[0]
214446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d1[0]
215446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    vext.u16    q12, q8, q9, #5
216446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #3
217446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
218446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
219446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[3]
220446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[3]
221446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    vext.u16    q12, q8, q9, #6
222446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #2
223446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
224446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
225446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[2]
226446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[2]
227446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    vext.u16    q12, q8, q9, #7
228446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #1
229446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
230446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[1]
231446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[1]
232446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[1]
233446788007efe0a673d0366284026adfa17b36fedSimon Hosie
234446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
235446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
236446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
237446788007efe0a673d0366284026adfa17b36fedSimon Hosie
238446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
239446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
240446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
241446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
242446788007efe0a673d0366284026adfa17b36fedSimon Hosie
243446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_16/*{{{*/
244446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d16, d0[0]
245446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d17, d0[0]
246446788007efe0a673d0366284026adfa17b36fedSimon Hosie
247446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
248446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
249446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
250446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
251446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
252446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
253446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
254446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
255446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
256446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
257446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
258446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 109f-100b
259446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 110f-100b
260446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 111f-100b
261446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 112f-100b
262446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 113f-100b
263446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 114f-100b
264446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 115f-100b
265446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 116f-100b
266446788007efe0a673d0366284026adfa17b36fedSimon Hosie    116:    //vext.u16    q12, q6, q7, #0
267446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q10, q11, #0
268446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d12, d4[0]
269446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d13, d4[0]
270446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d4[0]
271446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d4[0]
272446788007efe0a673d0366284026adfa17b36fedSimon Hosie    115:    vext.u16    q12, q6, q7, #1
273446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #7
274446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[3]
275446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[3]
276446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[3]
277446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[3]
278446788007efe0a673d0366284026adfa17b36fedSimon Hosie    114:    vext.u16    q12, q6, q7, #2
279446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #6
280446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[2]
281446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[2]
282446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[2]
283446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[2]
284446788007efe0a673d0366284026adfa17b36fedSimon Hosie    113:    vext.u16    q12, q6, q7, #3
285446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #5
286446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[1]
287446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[1]
288446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[1]
289446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[1]
290446788007efe0a673d0366284026adfa17b36fedSimon Hosie    112:    //vext.u16    q12, q6, q7, #4
291446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q9, q10, #4
292446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d13, d3[0]
293446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d14, d3[0]
294446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d3[0]
295446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d3[0]
296446788007efe0a673d0366284026adfa17b36fedSimon Hosie    111:    vext.u16    q12, q6, q7, #5
297446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #3
298446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[3]
299446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[3]
300446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[3]
301446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[3]
302446788007efe0a673d0366284026adfa17b36fedSimon Hosie    110:    vext.u16    q12, q6, q7, #6
303446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #2
304446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[2]
305446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[2]
306446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[2]
307446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[2]
308446788007efe0a673d0366284026adfa17b36fedSimon Hosie    109:    vext.u16    q12, q6, q7, #7
309446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #1
310446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[1]
311446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[1]
312446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[1]
313446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[1]
314446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    //vext.u16    q12, q7, q8, #0
315446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q9, q10, #0
316446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d14, d2[0]
317446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d15, d2[0]
318446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d18, d2[0]
319446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d19, d2[0]
320446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    vext.u16    q12, q7, q8, #1
321446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #7
322446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
323446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
324446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[3]
325446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[3]
326446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    vext.u16    q12, q7, q8, #2
327446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #6
328446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
329446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
330446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[2]
331446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[2]
332446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    vext.u16    q12, q7, q8, #3
333446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #5
334446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
335446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
336446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[1]
337446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[1]
338446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    //vext.u16    q12, q7, q8, #4
339446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q8, q9, #4
340446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d15, d1[0]
341446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d16, d1[0]
342446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d1[0]
343446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d1[0]
344446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    vext.u16    q12, q7, q8, #5
345446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #3
346446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
347446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
348446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[3]
349446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[3]
350446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    vext.u16    q12, q7, q8, #6
351446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #2
352446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
353446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
354446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[2]
355446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[2]
356446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    vext.u16    q12, q7, q8, #7
357446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #1
358446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
359446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[1]
360446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[1]
361446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[1]
362446788007efe0a673d0366284026adfa17b36fedSimon Hosie
363446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
364446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
365446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
366446788007efe0a673d0366284026adfa17b36fedSimon Hosie
367446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
368446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
369446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
370446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
371446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
372446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
373446788007efe0a673d0366284026adfa17b36fedSimon Hosie
374446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_25/*{{{*/
375446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q12, q6, q7, #7
376446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d24, d0[0]
377446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d25, d0[0]
378446788007efe0a673d0366284026adfa17b36fedSimon Hosie
379446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
380446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
381446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
382446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
383446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
384446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
385446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
386446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
387446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
388446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
389446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
390446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 109f-100b
391446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 110f-100b
392446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 111f-100b
393446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 112f-100b
394446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 113f-100b
395446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 114f-100b
396446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 115f-100b
397446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 116f-100b
398446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 117f-100b
399446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 118f-100b
400446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 119f-100b
401446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 120f-100b
402446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 121f-100b
403446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 122f-100b
404446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 123f-100b
405446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 124f-100b
406446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 125f-100b
407446788007efe0a673d0366284026adfa17b36fedSimon Hosie    125:    vext.u16    q12, q3, q4, #6
408446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q10, q11, #0
409446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d6[1]
410446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d6[1]
411446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d6[1]
412446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d6[1]
413446788007efe0a673d0366284026adfa17b36fedSimon Hosie    124:    vext.u16    q12, q3, q4, #7
414446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #7
415446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d6[0]
416446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d6[0]
417446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d6[0]
418446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d6[0]
419446788007efe0a673d0366284026adfa17b36fedSimon Hosie    123:    vext.u16    q12, q4, q5, #0
420446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #6
421446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[3]
422446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[3]
423446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d5[3]
424446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d5[3]
425446788007efe0a673d0366284026adfa17b36fedSimon Hosie    122:    vext.u16    q12, q4, q5, #1
426446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #5
427446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[2]
428446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[2]
429446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d5[2]
430446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d5[2]
431446788007efe0a673d0366284026adfa17b36fedSimon Hosie    121:    vext.u16    q12, q4, q5, #2
432446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #4
433446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[1]
434446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[1]
435446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d5[1]
436446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d5[1]
437446788007efe0a673d0366284026adfa17b36fedSimon Hosie    120:    vext.u16    q12, q4, q5, #3
438446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #3
439446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[0]
440446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[0]
441446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d5[0]
442446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d5[0]
443446788007efe0a673d0366284026adfa17b36fedSimon Hosie    119:    vext.u16    q12, q4, q5, #4
444446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #2
445446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[3]
446446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[3]
447446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d4[3]
448446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d4[3]
449446788007efe0a673d0366284026adfa17b36fedSimon Hosie    118:    vext.u16    q12, q4, q5, #5
450446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #1
451446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[2]
452446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[2]
453446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d4[2]
454446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d4[2]
455446788007efe0a673d0366284026adfa17b36fedSimon Hosie    117:    vext.u16    q12, q4, q5, #6
456446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #0
457446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[1]
458446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[1]
459446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d4[1]
460446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d4[1]
461446788007efe0a673d0366284026adfa17b36fedSimon Hosie    116:    vext.u16    q12, q4, q5, #7
462446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #7
463446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[0]
464446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[0]
465446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d4[0]
466446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d4[0]
467446788007efe0a673d0366284026adfa17b36fedSimon Hosie    115:    vext.u16    q12, q5, q6, #0
468446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #6
469446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[3]
470446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[3]
471446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[3]
472446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[3]
473446788007efe0a673d0366284026adfa17b36fedSimon Hosie    114:    vext.u16    q12, q5, q6, #1
474446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #5
475446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[2]
476446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[2]
477446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[2]
478446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[2]
479446788007efe0a673d0366284026adfa17b36fedSimon Hosie    113:    vext.u16    q12, q5, q6, #2
480446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #4
481446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[1]
482446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[1]
483446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[1]
484446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[1]
485446788007efe0a673d0366284026adfa17b36fedSimon Hosie    112:    vext.u16    q12, q5, q6, #3
486446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #3
487446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[0]
488446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[0]
489446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[0]
490446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[0]
491446788007efe0a673d0366284026adfa17b36fedSimon Hosie    111:    vext.u16    q12, q5, q6, #4
492446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #2
493446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[3]
494446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[3]
495446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[3]
496446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[3]
497446788007efe0a673d0366284026adfa17b36fedSimon Hosie    110:    vext.u16    q12, q5, q6, #5
498446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #1
499446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[2]
500446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[2]
501446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[2]
502446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[2]
503446788007efe0a673d0366284026adfa17b36fedSimon Hosie    109:    vext.u16    q12, q5, q6, #6
504446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #0
505446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[1]
506446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[1]
507446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[1]
508446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[1]
509446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    vext.u16    q12, q5, q6, #7
510446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #7
511446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[0]
512446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[0]
513446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[0]
514446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[0]
515446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    vext.u16    q12, q6, q7, #0
516446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #6
517446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
518446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
519446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[3]
520446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[3]
521446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    vext.u16    q12, q6, q7, #1
522446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #5
523446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
524446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
525446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[2]
526446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[2]
527446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    vext.u16    q12, q6, q7, #2
528446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #4
529446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
530446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
531446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[1]
532446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[1]
533446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    vext.u16    q12, q6, q7, #3
534446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #3
535446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[0]
536446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[0]
537446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[0]
538446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[0]
539446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    vext.u16    q12, q6, q7, #4
540446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #2
541446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
542446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
543446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[3]
544446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[3]
545446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    vext.u16    q12, q6, q7, #5
546446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #1
547446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
548446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
549446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[2]
550446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[2]
551446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    vext.u16    q12, q6, q7, #6
552446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #0
553446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
554446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[1]
555446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[1]
556446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[1]
557446788007efe0a673d0366284026adfa17b36fedSimon Hosie
558446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
559446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
560446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
561446788007efe0a673d0366284026adfa17b36fedSimon Hosie
562446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        d7, d9
563446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q4, q5
564446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q5, q6
565446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
566446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
567446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
568446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
569446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
570446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
571446788007efe0a673d0366284026adfa17b36fedSimon Hosie
572446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST4 6, 12
573446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_6/*{{{*/
574446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d14, d0[0]
575446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d15, d0[0]
576446788007efe0a673d0366284026adfa17b36fedSimon Hosie
577446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
578446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
579446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
580446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
581446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
582446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
583446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
584446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
585446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
586446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    vmlal.u16   q14, d8,  d1[2]
587446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d9,  d1[2]
588446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d1[2]
589446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d1[2]
590446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    vmlal.u16   q14, d9,  d1[1]
591446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d10, d1[1]
592446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d1[1]
593446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d1[1]
594446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    vmlal.u16   q14, d10, d1[0]
595446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d11, d1[0]
596446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d18, d1[0]
597446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d19, d1[0]
598446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    vmlal.u16   q14, d11, d0[3]
599446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d12, d0[3]
600446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d0[3]
601446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d0[3]
602446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    vmlal.u16   q14, d12, d0[2]
603446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d13, d0[2]
604446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d16, d0[2]
605446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d17, d0[2]
606446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    vmlal.u16   q14, d13, d0[1]
607446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d14, d0[1]
608446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d15, d0[1]
609446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d16, d0[1]
610446788007efe0a673d0366284026adfa17b36fedSimon Hosie
611446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
612446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
613446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
614446788007efe0a673d0366284026adfa17b36fedSimon Hosie
615446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q4, q5
616446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q5, q6
617446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
618446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
619446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
620446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
621446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
622446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
623446788007efe0a673d0366284026adfa17b36fedSimon Hosie
624446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_12/*{{{*/
625446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d8, d0[0]
626446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d9, d0[0]
627446788007efe0a673d0366284026adfa17b36fedSimon Hosie
628446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
629446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
630446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
631446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
632446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
633446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
634446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
635446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
636446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
637446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
638446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
639446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 109f-100b
640446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 110f-100b
641446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 111f-100b
642446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 112f-100b
643446788007efe0a673d0366284026adfa17b36fedSimon Hosie    112:    add         r12, r9, #0x1a0
644446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
645446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
646446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[0]
647446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[0]
648446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d3[0]
649446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d3[0]
650446788007efe0a673d0366284026adfa17b36fedSimon Hosie    111:    add         r12, r9, #0x1a8
651446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
652446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
653446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
654446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
655446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[3]
656446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[3]
657446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d2[3]
658446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d2[3]
659446788007efe0a673d0366284026adfa17b36fedSimon Hosie    110:    add         r12, r9, #0x1b0
660446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
661446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
662446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[2]
663446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[2]
664446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d18, d2[2]
665446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d19, d2[2]
666446788007efe0a673d0366284026adfa17b36fedSimon Hosie    109:    add         r12, r9, #0x1b8
667446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
668446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
669446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
670446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
671446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[1]
672446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[1]
673446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d2[1]
674446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d2[1]
675446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    add         r12, r9, #0x1c0
676446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
677446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
678446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[0]
679446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[0]
680446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d16, d2[0]
681446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d17, d2[0]
682446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    add         r12, r9, #0x1c8
683446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
684446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
685446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
686446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
687446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
688446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
689446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d15, d1[3]
690446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d16, d1[3]
691446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    add         r12, r9, #0x1d0
692446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
693446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
694446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
695446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
696446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d14, d1[2]
697446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d15, d1[2]
698446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    add         r12, r9, #0x1d8
699446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
700446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
701446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
702446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
703446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
704446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
705446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d13, d1[1]
706446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d14, d1[1]
707446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    add         r12, r9, #0x1e0
708446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
709446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
710446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[0]
711446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[0]
712446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d12, d1[0]
713446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d13, d1[0]
714446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    add         r12, r9, #0x1e8
715446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
716446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
717446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
718446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
719446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
720446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
721446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d11, d0[3]
722446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d12, d0[3]
723446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    add         r12, r9, #0x1f0
724446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
725446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
726446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
727446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
728446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d10, d0[2]
729446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d11, d0[2]
730446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    add         r12, r9, #0x1f8
731446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
732446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]
733446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
734446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d8,  d0[1]
735446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d9,  d0[1]
736446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d10, d0[1]
737446788007efe0a673d0366284026adfa17b36fedSimon Hosie
738446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
739446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
740446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
741446788007efe0a673d0366284026adfa17b36fedSimon Hosie
742446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {q4}, [r9:128]!
743446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r9, r9, #0x200
744446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q4, q5
745446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q5, q6
746446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
747446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
748446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
749446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
750446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
751446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
752446788007efe0a673d0366284026adfa17b36fedSimon Hosie
753446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_25/*{{{*/
754446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r12, r9, #0x198
755446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
756446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
757446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
758446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
759446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d24, d0[0]
760446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d25, d0[0]
761446788007efe0a673d0366284026adfa17b36fedSimon Hosie
762446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
763446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
764446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
765446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
766446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
767446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
768446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
769446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
770446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
771446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
772446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
773446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 109f-100b
774446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 110f-100b
775446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 111f-100b
776446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 112f-100b
777446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 113f-100b
778446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 114f-100b
779446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 115f-100b
780446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 116f-100b
781446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 117f-100b
782446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 118f-100b
783446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 119f-100b
784446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 120f-100b
785446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 121f-100b
786446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 122f-100b
787446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 123f-100b
788446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 124f-100b
789446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 125f-100b
790446788007efe0a673d0366284026adfa17b36fedSimon Hosie    125:    add         r12, r9, #0x0d0
791446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
792446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
793446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d6[1]
794446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d6[1]
795446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d6[1]
796446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d6[1]
797446788007efe0a673d0366284026adfa17b36fedSimon Hosie    124:    add         r12, r9, #0x0d8
798446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
799446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
800446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
801446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
802446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d6[0]
803446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d6[0]
804446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d6[0]
805446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d6[0]
806446788007efe0a673d0366284026adfa17b36fedSimon Hosie    123:    add         r12, r9, #0x0e0
807446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
808446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
809446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[3]
810446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[3]
811446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d18, d5[3]
812446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d19, d5[3]
813446788007efe0a673d0366284026adfa17b36fedSimon Hosie    122:    add         r12, r9, #0x0e8
814446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
815446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
816446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
817446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
818446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[2]
819446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[2]
820446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d5[2]
821446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d5[2]
822446788007efe0a673d0366284026adfa17b36fedSimon Hosie    121:    add         r12, r9, #0x0f0
823446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
824446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
825446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[1]
826446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[1]
827446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d16, d5[1]
828446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d17, d5[1]
829446788007efe0a673d0366284026adfa17b36fedSimon Hosie    120:    add         r12, r9, #0x0f8
830446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
831446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
832446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
833446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
834446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[0]
835446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[0]
836446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d15, d5[0]
837446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d16, d5[0]
838446788007efe0a673d0366284026adfa17b36fedSimon Hosie    119:    add         r12, r9, #0x100
839446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
840446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
841446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[3]
842446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[3]
843446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d14, d4[3]
844446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d15, d4[3]
845446788007efe0a673d0366284026adfa17b36fedSimon Hosie    118:    add         r12, r9, #0x108
846446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
847446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
848446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
849446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
850446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[2]
851446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[2]
852446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d13, d4[2]
853446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d14, d4[2]
854446788007efe0a673d0366284026adfa17b36fedSimon Hosie    117:    add         r12, r9, #0x110
855446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
856446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
857446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[1]
858446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[1]
859446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d12, d4[1]
860446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d13, d4[1]
861446788007efe0a673d0366284026adfa17b36fedSimon Hosie    116:    add         r12, r9, #0x118
862446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
863446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
864446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
865446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
866446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[0]
867446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[0]
868446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d11, d4[0]
869446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d12, d4[0]
870446788007efe0a673d0366284026adfa17b36fedSimon Hosie    115:    add         r12, r9, #0x120
871446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
872446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
873446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[3]
874446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[3]
875446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d10, d3[3]
876446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d11, d3[3]
877446788007efe0a673d0366284026adfa17b36fedSimon Hosie    114:    add         r12, r9, #0x128
878446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
879446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
880446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
881446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
882446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[2]
883446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[2]
884446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d9,  d3[2]
885446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d10, d3[2]
886446788007efe0a673d0366284026adfa17b36fedSimon Hosie    113:    add         r12, r9, #0x130
887446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
888446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
889446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[1]
890446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[1]
891446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d8,  d3[1]
892446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d9,  d3[1]
893446788007efe0a673d0366284026adfa17b36fedSimon Hosie    112:    add         r12, r9, #0x138
894446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
895446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
896446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
897446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
898446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1f8
899446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
900446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]
901446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[0]
902446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[0]
903446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[0]   @ Could be d7, without the load, right?
904446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d8,  d3[0]
905446788007efe0a673d0366284026adfa17b36fedSimon Hosie    111:    add         r12, r9, #0x140
906446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
907446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
908446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1f0
909446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
910446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
911446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[3]
912446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[3]
913446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[3]
914446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[3]
915446788007efe0a673d0366284026adfa17b36fedSimon Hosie    110:    add         r12, r9, #0x148
916446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
917446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
918446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
919446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
920446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1e8
921446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
922446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
923446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
924446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
925446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[2]
926446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[2]
927446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[2]
928446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[2]
929446788007efe0a673d0366284026adfa17b36fedSimon Hosie    109:    add         r12, r9, #0x150
930446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
931446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
932446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1e0
933446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
934446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
935446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[1]
936446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[1]
937446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[1]
938446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[1]
939446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    add         r12, r9, #0x158
940446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
941446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
942446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
943446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
944446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1d8
945446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
946446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
947446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
948446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
949446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[0]
950446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[0]
951446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[0]
952446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[0]
953446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    add         r12, r9, #0x160
954446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
955446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
956446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1d0
957446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
958446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
959446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
960446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
961446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[3]
962446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[3]
963446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    add         r12, r9, #0x168
964446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
965446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
966446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
967446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
968446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1c8
969446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
970446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
971446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
972446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
973446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
974446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
975446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[2]
976446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[2]
977446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    add         r12, r9, #0x170
978446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
979446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
980446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1c0
981446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
982446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
983446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
984446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
985446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[1]
986446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[1]
987446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    add         r12, r9, #0x178
988446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
989446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
990446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
991446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
992446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1b8
993446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
994446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
995446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
996446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
997446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[0]
998446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[0]
999446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[0]
1000446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[0]
1001446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    add         r12, r9, #0x180
1002446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1003446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
1004446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1b0
1005446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
1006446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
1007446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
1008446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
1009446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[3]
1010446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[3]
1011446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    add         r12, r9, #0x188
1012446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1013446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
1014446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1015446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
1016446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1a8
1017446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
1018446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
1019446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
1020446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
1021446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
1022446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
1023446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[2]
1024446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[2]
1025446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    add         r12, r9, #0x190
1026446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1027446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]!
1028446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1029446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d26,d27}, [r12:128]
1030446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
1031446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[1]
1032446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[1]
1033446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[1]
1034446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1035446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
1036446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
1037446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
1038446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1039446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {q4}, [r9:128]!
1040446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r9, r9, #0x200
1041446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q4, q5
1042446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q5, q6
1043446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
1044446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
1045446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
1046446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
1047446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
1048446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
1049446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1050446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Dedicated function wrapper for the fetch macro, for the cases where
1051446788007efe0a673d0366284026adfa17b36fedSimon Hosie * performance isn't that important, to keep code size down.
1052446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
10535eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(fetch_generic_asm)
1054446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r10,r11}
1055446788007efe0a673d0366284026adfa17b36fedSimon Hosie            fetch
1056446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r10,r11}
1057446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          lr
1058446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(fetch_generic_asm)
1059446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1060446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Given values in q10 and q11, and an index in r11, sweep the (r11&15)th value
1061446788007efe0a673d0366284026adfa17b36fedSimon Hosie * across to fill the rest of the register pair.  Used for filling the right
1062446788007efe0a673d0366284026adfa17b36fedSimon Hosie * hand edge of the window when starting too close to the right hand edge of
1063446788007efe0a673d0366284026adfa17b36fedSimon Hosie * the image.
10645eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * Also returns a dup-ed copy of the last element in q12 for the tail-fill
10655eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * case (this happens incidentally in common path, but must be done
10665eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie * deliberately in the fast-out path).
1067446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
10685eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(prefetch_clampright1)
10695eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            ands        r12, r11, #15
1070446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
10715eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         r12, r12, #1
10725eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         sp, sp, #64
10735eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q10,q11}, [sp]
10745eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            add         r12, sp, r12, LSL #1
10755eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vld1.u16    {d24[]}, [r12]
10765eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vld1.u16    {d25[]}, [r12]
10775eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q12}, [r12]!
10785eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q12}, [r12]
10795eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vld1.u16    {q10,q11}, [sp]
10805eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            add         sp, sp, #64
10815eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            bx          lr
10825eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie1:          vdup.u16    q12, d23[3]
1083446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          lr
10845eb463f2c28cf161fe31b8078908a7b01198516fSimon HosieEND(prefetch_clampright1)
1085446788007efe0a673d0366284026adfa17b36fedSimon Hosie
10865eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(prefetch_clampright4)
10875eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            ands        r12, r11, #15
1088446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
10895eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         r12, r12, #4
10905eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         sp, sp, #64
10915eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q10,q11}, [sp]
10925eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            add         r12, sp, r12, LSL #1
10935eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vld1.u64    {d24}, [r12]
10945eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vld1.u64    {d25}, [r12]
10955eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q12}, [r12]!
10965eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q12}, [r12]
10975eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vld1.u16    {q10,q11}, [sp]
10985eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            add         sp, sp, #64
10995eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            bx          lr
11005eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie1:          vmov.u16    d24, d23
11015eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vmov.u16    d25, d23
1102446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          lr
11035eb463f2c28cf161fe31b8078908a7b01198516fSimon HosieEND(prefetch_clampright4)
1104446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1105446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1106446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Helpers for prefetch, below.
1107446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1108446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_out qa, qb, store, qsa, qsb, qsb_hi
1109446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \store > 0
1110446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .ifc \qsa,\qsb
1111446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u16    {\qsa}, [r9:128]!
1112446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u16    {\qsb}, [r9:128]!
1113446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .else
1114446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u16    {\qsa,\qsb}, [r9:256]!
1115446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .endif
1116446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .elseif \store == 0
1117446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    \qa, \qsa
1118446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    \qb, \qsb
1119446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1120446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    \qb, \qsb_hi
1121446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1122446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
1123446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1124446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_one  qa, qb, rem, c, store=0, step=1
1125446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set i, (need - 16) - \rem
1126446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if i >= 0
1127446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          cmp         r10, #i+16
1128446788007efe0a673d0366284026adfa17b36fedSimon Hosie            blo         2f
1129446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_out \qa, \qb, \store, q9, q9, d19
1130446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           1f
1131446788007efe0a673d0366284026adfa17b36fedSimon Hosie2:          cmp         r11, #i+16
1132446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bls         3f
1133446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_out \qa, \qb, \store, q10, q11, d23
1134446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bl          fetch_generic_asm
1135446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           2f
11365eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie3:          bl          prefetch_clampright\step
1137446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_out \qa, \qb, \store, q10, q11, d23
1138446788007efe0a673d0366284026adfa17b36fedSimon Hosie4:          b           4f+4
11395eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            @q12 contains pad word from prefetch_clampright call
1140446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_out \qa, \qb, \store, q12, q12, d25
1141446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \rem > 0
1142446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           4f+4
1143446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1144446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:
1145446788007efe0a673d0366284026adfa17b36fedSimon Hosie2:
1146446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:
1147446788007efe0a673d0366284026adfa17b36fedSimon Hosie4:          nop
1148446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1149446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif
1150446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
1151446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1152446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fill the convolution window with context data.  The aim here is to load
1153446788007efe0a673d0366284026adfa17b36fedSimon Hosie * exactly rlf + rrt columns, and in the main loop to read as many columns as
1154446788007efe0a673d0366284026adfa17b36fedSimon Hosie * will be written.  This is complicated by the need to handle cases when the
1155446788007efe0a673d0366284026adfa17b36fedSimon Hosie * input starts very close to the left or right (or both) edges of the image,
1156446788007efe0a673d0366284026adfa17b36fedSimon Hosie * and where these do not fall on 16-byte boundaries.
1157446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
1158446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input:
1159446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 -- src
1160446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r2 -- pitch
1161446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r3 -- count
1162446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r4 -- inlen
1163446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r5 -- r
1164446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r6 -- rup
1165446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r7 -- rdn
1166446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r8 -- rlf
1167446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r9 -- buffer (if needed)
1168446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output:
1169446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 += rlf + min(count, rrt)
1170446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies:
1171446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r10 -- fill start index in the window
1172446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r11 -- fill stop index in the window
1173446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r12 -- scratch
1174446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1175446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch step=1, max_r=25
1176446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set need, ((\max_r + \max_r) * \step + 15) & ~15
1177446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step == 1
1178446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r10, r8, #need - (\max_r * \step)
1179446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1180446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mov         r10, r8, LSL #2
1181446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r10, r10, #need - (\max_r * \step)
1182446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1183446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r11, r10, r4
1184446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r11, #need
1185446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r11, #need
1186446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1187446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bl          fetch_generic_asm
1188446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step == 1
1189446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q9, d20[0]
1190446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1191446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    d18, d20
1192446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    d19, d20
1193446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
11945eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            ands        r12, r10, #15
1195446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         2f
11965eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         sp, sp, #32
11975eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q10,q11}, [sp]
11985eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         r12, sp, r12, LSL #1
11995eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         sp, sp, #16
12005eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q9}, [sp]
12015eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         sp, sp, #16
12025eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vst1.u16    {q9}, [sp]
12035eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            vld1.u16    {q10,q11}, [r12]
12045eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            add         sp, sp, #64
12055eb463f2c28cf161fe31b8078908a7b01198516fSimon Hosie            sub         r1, r1, r10
1206446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r10, r10, #15
1207446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, r10
1208446788007efe0a673d0366284026adfa17b36fedSimon Hosie2:
1209446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step > 1
1210446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* it's only in the uchar2 and uchar4 cases where the register file
1211446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * is insufficient (given MAX_R <= 25).
1212446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1213446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 192, c=\max_r, step=\step, store=1
1214446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 176, c=\max_r, step=\step, store=1
1215446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 160, c=\max_r, step=\step, store=1
1216446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 144, c=\max_r, step=\step, store=1
1217446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 128, c=\max_r, step=\step, store=1
1218446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 112, c=\max_r, step=\step, store=1
1219446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx,  96, c=\max_r, step=\step, store=1
1220446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx,  80, c=\max_r, step=\step, store=1
1221446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx,  64, c=\max_r, step=\step, store=1
1222446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx,  48, c=\max_r, step=\step, store=1
1223446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1224446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* q3 normally contains the coefficient table, but it's not fully
1225446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * used.  In the uchar1, r=25 case the other half of q3 is used for
1226446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * the last two window taps to avoid falling out to memory.
1227446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1228446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, d7,  48, c=\max_r, step=\step, store=-1
1229446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1230446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one q4, q5,  32, c=\max_r, step=\step, store=0
1231446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one q6, q7,  16, c=\max_r, step=\step, store=0
1232446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one q8, q9,   0, c=\max_r, step=\step, store=0
1233446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1234446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step == 1
1235446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r10, r8, #\max_r * \step
1236446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1237446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mov         r10, r8, LSL #2
1238446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r10, r10, #\max_r * \step
1239446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1240446788007efe0a673d0366284026adfa17b36fedSimon Hosie            subs        r4, r4, r10
1241446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movlo       r4, #0
1242446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
1243446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1244446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* The main loop.
1245446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
1246446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input:
1247446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r0 = dst
1248446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 = src
1249446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r2 = pitch
1250446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r3 = count
1251446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r4 = inlen
1252446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r5 = r
1253446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r6 = rup
1254446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r7 = rdn
1255446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r9 = buffer
1256446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies
1257446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r8 = fetch code pointer
1258446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1259446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro mainloop core, step=1, max_r=25, labelc="", labelnc=""
1260446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r8, 3f
1261446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          add         r8, r8, pc
1262446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r8, r5, LSL #5
1263446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r8, r5, LSL #4
1264446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r5, r6
1265446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmpeq       r5, r7
1266446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         5f
1267446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1268446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* if (r != rup || r != rdn) then the address-clamping table should
1269446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * be used rather than the short-cut version.
1270446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1271446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r8, 3f+4
1272446788007efe0a673d0366284026adfa17b36fedSimon Hosie2:          add         r8, r8, pc
1273446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r8, r5, LSL #6
1274446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           5f
1275446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .align 3
1276446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:          .word       \labelnc-1b-8
1277446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word       \labelc-2b-8
1278446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .align 4
1279446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:          fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=r8
1280446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1281446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* For each call to fetch two are made to \core.  It would be
1282446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * preferable to have twice the work done in \core, but the
1283446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * register file is too small for this to be straightforward.
1284446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1285446788007efe0a673d0366284026adfa17b36fedSimon Hosie            \core
1286446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {d31}, [r0]!
1287446788007efe0a673d0366284026adfa17b36fedSimon Hosie            \core
1288446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {d31}, [r0]!
1289446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1290446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r3, r3, #16
1291446788007efe0a673d0366284026adfa17b36fedSimon Hosie5:          subs        r4, r4, #16
1292446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bhs         3b
1293446788007efe0a673d0366284026adfa17b36fedSimon Hosie            adds        r4, r4, #16
1294446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bne         1f
1295446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step==1
1296446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q10, d19[3]
1297446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q11, d19[3]
1298446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1299446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d20, d19
1300446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d21, d19
1301446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d22, d19
1302446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d23, d19
1303446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1304446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           4f
1305446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1306446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          sub         r1, r1, #16
1307446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, r4
1308446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bl          fetch_generic_asm
1309446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1310446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step==1
1311446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q12, d23[3]
1312446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1313446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d24, d23
1314446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d25, d23
1315446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1316446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r4, r4, #0
1317446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r4, #8
1318446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1319446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
1320446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q11, q12
1321446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r4, #4
1322446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1323446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q10, q11, #4
1324446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q11, q12, #4
1325446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r4, #2
1326446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1327446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q10, q11, #2
1328446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q11, q12, #2
1329446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r4, #1
1330446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         4f
1331446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q10, q11, #1
1332446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q11, q12, #1
1333446788007efe0a673d0366284026adfa17b36fedSimon Hosie4:          cmp         r3, #0
1334446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         5f
1335446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:          \core
1336446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step==1
1337446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q11, d23[3]
1338446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1339446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d22, d23
1340446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1341446788007efe0a673d0366284026adfa17b36fedSimon Hosie            subs        r3, r3, #8
1342446788007efe0a673d0366284026adfa17b36fedSimon Hosie            blo         4f
1343446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {d31}, [r0]!
1344446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         5f
1345446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           3b
1346446788007efe0a673d0366284026adfa17b36fedSimon Hosie4:          tst         r3, #4
1347446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1348446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u32    {d31[0]}, [r0]!
1349446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u8     d31, d31, d31, #4
1350446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r3, #2
1351446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1352446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u16    {d31[0]}, [r0]!
1353446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u8     d31, d31, d31, #2
1354446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r3, #1
1355446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         5f
1356446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {d31[0]}, [r0]!
1357446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u8     d31, d31, d31, #1
1358446788007efe0a673d0366284026adfa17b36fedSimon Hosie5:          nop
1359446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
1360446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1361446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST1, 25
13625eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(convolve1_\r)
1363446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r12,lr}
1364446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1365446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, r8
1366446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1367446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch    step=1, max_r=\r
1368446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1369446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mainloop    core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r
1370446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1371446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r12,pc}
1372446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve1_\r)
1373446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr
1374446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1375446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST4, 25
13765eb463f2c28cf161fe31b8078908a7b01198516fSimon HosiePRIVATE(convolve4_\r)
1377446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r12, sp, #0x200
1378446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r9, r12, #0x3fc
1379446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mov         sp, r9
1380446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r12,lr}
1381446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1382446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* r9 now points to a buffer on the stack whose address has the low
1383446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * 10 bits clear.  This allows easy address calculation in the
1384446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * wrap-around cases.
1385446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1386446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1387446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, r8, LSL #2
1388446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1389446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch    step=4, max_r=\r
1390446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1391446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mainloop    core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r
1392446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1393446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r12,lr}
1394446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         sp, r12, #0x200
1395446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          lr
1396446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve4_\r)
1397446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr
1398446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1399446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU1_K(
1400446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  void *out,      // r0
1401446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  void *in,       // r1
1402446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t w,       // r2
1403446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t h,       // r3
1404446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t p,       // [sp]
1405446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t x,       // [sp,#4]
1406446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t y,       // [sp,#8]
1407446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t count,   // [sp,#12]
1408446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t r,       // [sp,#16]
1409446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  uint16_t *tab); // [sp,#20]
1410446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1411446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU1_K)
1412446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1413446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vpush       {d8-d15}
1414446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r5, [sp,#120]
1415446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r8, [sp,#108]
1416446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r6, [sp,#112]
1417446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r9, r2, r8
1418446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r7, r3, r6
1419446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r2, [sp,#104]
1420446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r3, [sp,#116]
1421446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r9, r9, r3
1422446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r7, r7, #1
1423446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1424446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [sp,#124]
1425446788007efe0a673d0366284026adfa17b36fedSimon Hosie
14264c8f2477285848ab0a4f33ad854de9398d332e8cJason Sams            add         r1, r1, r8
1427446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1428446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r6, r5
1429446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r6, r5
1430446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r7, r5
1431446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r7, r5
1432446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r8, r5
1433446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r8, r5
1434446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r9, r5
1435446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r9, r5
1436446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1437446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r4, r8, r9
1438446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r4, r4, r3
1439446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1440446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d0,d1,d2,d3}, [r12]!
1441446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d4,d5,d6}, [r12]!
1442446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1443446788007efe0a673d0366284026adfa17b36fedSimon Hosie            adr         lr, 1f
1444446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .irep r, TUNED_LIST1
1445446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r5, #\r
1446446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bls         convolve1_\r
1447446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endr
1448446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           convolve1_25
1449446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1450446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          vpop        {d8-d15}
1451446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
1452446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU1_K)
1453446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1454446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU4_K(
1455446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  void *out,      // r0
1456446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  void *in,       // r1
1457446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t w,       // r2
1458446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t h,       // r3
1459446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t p,       // [sp]
1460446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t x,       // [sp,#4]
1461446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t y,       // [sp,#8]
1462446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t count,   // [sp,#12]
1463446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t r,       // [sp,#16]
1464446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  uint16_t *tab); // [sp,#20]
1465446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1466446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU4_K)
1467446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1468446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vpush       {d8-d15}
1469446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r5, [sp,#120]
1470446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r8, [sp,#108]
1471446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r6, [sp,#112]
1472446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r9, r2, r8
1473446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r7, r3, r6
1474446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r2, [sp,#104]
1475446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r3, [sp,#116]
1476446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r9, r9, r3
1477446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r7, r7, #1
1478446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1479446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [sp,#124]
1480446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1481446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, r8, LSL #2
1482446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1483446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r6, r5
1484446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r6, r5
1485446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r7, r5
1486446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r7, r5
1487446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r8, r5
1488446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r8, r5
1489446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r9, r5
1490446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r9, r5
1491446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1492446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mov         r3, r3, LSL #2
1493446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r4, r8, r9
1494446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r4, r3, r4, LSL #2
1495446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1496446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d0,d1,d2,d3}, [r12]!
1497446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d4,d5,d6}, [r12]!
1498446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1499446788007efe0a673d0366284026adfa17b36fedSimon Hosie            adr         lr, 1f
1500446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .irep r, TUNED_LIST4
1501446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r5, #\r
1502446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bls         convolve4_\r
1503446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endr
1504446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           convolve4_25
1505446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1506446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          vpop        {d8-d15}
1507446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
1508446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU4_K)
1509