1446788007efe0a673d0366284026adfa17b36fedSimon Hosie/*
2446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Copyright (C) 2014 The Android Open Source Project
3446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
4446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Licensed under the Apache License, Version 2.0 (the "License");
5446788007efe0a673d0366284026adfa17b36fedSimon Hosie * you may not use this file except in compliance with the License.
6446788007efe0a673d0366284026adfa17b36fedSimon Hosie * You may obtain a copy of the License at
7446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
8446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      http://www.apache.org/licenses/LICENSE-2.0
9446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
10446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Unless required by applicable law or agreed to in writing, software
11446788007efe0a673d0366284026adfa17b36fedSimon Hosie * distributed under the License is distributed on an "AS IS" BASIS,
12446788007efe0a673d0366284026adfa17b36fedSimon Hosie * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13446788007efe0a673d0366284026adfa17b36fedSimon Hosie * See the License for the specific language governing permissions and
14446788007efe0a673d0366284026adfa17b36fedSimon Hosie * limitations under the License.
15446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
16446788007efe0a673d0366284026adfa17b36fedSimon Hosie
17446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart
18446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define END(f) .fnend; .size f, .-f;
19446788007efe0a673d0366284026adfa17b36fedSimon Hosie
20446788007efe0a673d0366284026adfa17b36fedSimon Hosie.eabi_attribute 25,1 @Tag_ABI_align8_preserved
21446788007efe0a673d0366284026adfa17b36fedSimon Hosie.arm
22446788007efe0a673d0366284026adfa17b36fedSimon Hosie
23446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Number of fractional bits to preserve in intermediate results.  The
24446788007efe0a673d0366284026adfa17b36fedSimon Hosie * intermediate storage is 16-bit, and we started with 8 bit data (the integer
25446788007efe0a673d0366284026adfa17b36fedSimon Hosie * part), so this should be between 0 and 8.
26446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
27446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set FRACTION_BITS, 7
28446788007efe0a673d0366284026adfa17b36fedSimon Hosie
29446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set MAX_R, 25
30446788007efe0a673d0366284026adfa17b36fedSimon Hosie
31446788007efe0a673d0366284026adfa17b36fedSimon Hosie
32446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* A quick way of making a line of code conditional on some other condition.
33446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Use `.set cc, 1` or `.set cc, 0` to enable or disable lines prefixed with
34446788007efe0a673d0366284026adfa17b36fedSimon Hosie * `ifcc`:
35446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
36446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro ifcc zzz:vararg
37446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if cc
38446788007efe0a673d0366284026adfa17b36fedSimon Hosie            \zzz
39446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif
40446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
41446788007efe0a673d0366284026adfa17b36fedSimon Hosie
42446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fetch 16 columns of bytes (regardless of image format), convolve these
43446788007efe0a673d0366284026adfa17b36fedSimon Hosie * vertically, and leave them in the register file.  If working near the top or
44446788007efe0a673d0366284026adfa17b36fedSimon Hosie * bottom of an image then clamp the addressing while loading the data in.
45446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
46446788007efe0a673d0366284026adfa17b36fedSimon Hosie * The convolution is fully unrolled for windows up to max_r, with the
47446788007efe0a673d0366284026adfa17b36fedSimon Hosie * outermost edges calculated first.  This way it's possible to branch directly
48446788007efe0a673d0366284026adfa17b36fedSimon Hosie * into the relevant part of the code for an arbitrary convolution radius.  Two
49446788007efe0a673d0366284026adfa17b36fedSimon Hosie * variants of the loop are produced; one eliminates the clamping code for a
50446788007efe0a673d0366284026adfa17b36fedSimon Hosie * slight speed advantage.
51446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
52446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Where the macro is called with reg=x, the specified register is taken to
53446788007efe0a673d0366284026adfa17b36fedSimon Hosie * contain a pre-calculated pointer into one of the two loops.
54446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
55446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input:
56446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 -- src
57446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r2 -- pitch
58446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r5 -- r
59446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r6 -- rup
60446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r7 -- rdn
61446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r12 -- switch index
62446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q0-q3 -- coefficient table
63446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output:
64446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 += 16
65446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q10,q11 -- 16 convolved columns
66446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies:
67446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r10 = upper row pointer
68446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r11 = lower row pointer
69446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q12-q15 = temporary sums
70446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
71446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro fetch, max_r=MAX_R, labelc=1, labelnc=2, reg=r12 /*{{{*/
72446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .ifc \reg,r12 ; .set cc, 1 ; .else ; .set cc, 0 ; .endif
73446788007efe0a673d0366284026adfa17b36fedSimon Hosie
74446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d30,d31}, [r1]
75446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mls         r10, r2, r6, r1
76446788007efe0a673d0366284026adfa17b36fedSimon Hosie
77446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmovl.u8    q14, d30
78446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pld         [r1, #32]
79446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmovl.u8    q15, d31
80446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \max_r < 16 // approximate
81446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    adr         \reg, 1f
82446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
83446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    ldr         \reg, 2f
84446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:  ifcc    add         \reg, \reg, pc
85446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
86446788007efe0a673d0366284026adfa17b36fedSimon Hosie
87446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q12, d28, d0[0]
88446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    sub         \reg, r5, LSL #6
89446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q13, d29, d0[0]
90446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mla         r11, r2, r7, r1
91446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d30, d0[0]
92446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, #16
93446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d31, d0[0]
94446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          \reg
95446788007efe0a673d0366284026adfa17b36fedSimon Hosie
96446788007efe0a673d0366284026adfa17b36fedSimon Hosie     ifcc   .align 2
97446788007efe0a673d0366284026adfa17b36fedSimon Hosie  2: ifcc   .word       1f-1b-8
98446788007efe0a673d0366284026adfa17b36fedSimon Hosie
99446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .irp rowclamp, 1, 0
100446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .set cc, \rowclamp
101446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .align 4
102446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .irp dreg, 6, 5, 4, 3, 2, 1, 0 ; .irp lane, 3, 2, 1, 0
103446788007efe0a673d0366284026adfa17b36fedSimon Hosie      .set i, \dreg * 4 + \lane
104446788007efe0a673d0366284026adfa17b36fedSimon Hosie      .if 0 < i && i <= \max_r
105446788007efe0a673d0366284026adfa17b36fedSimon Hosie        .if \rowclamp
106446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d20,d21}, [r10]
107446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d22,d23}, [r11]
108446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r6, #i
109446788007efe0a673d0366284026adfa17b36fedSimon Hosie        .else
110446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d20,d21}, [r10], r2
111446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.8      {d22,d23}, [r11]
112446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r11, r11, r2
113446788007efe0a673d0366284026adfa17b36fedSimon Hosie        .endif
114446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vswp        d21, d22
115446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pld         [r10, #32]
116446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vaddl.u8    q10, d20, d21
117446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    addhs       r10, r10, r2
118446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vaddl.u8    q11, d22, d23
119446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    cmp         r7, #i
120446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q12, d20, d\dreg[\lane]
121446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pld         [r11, #32]
122446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q13, d21, d\dreg[\lane]
123446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    subhs       r11, r11, r2
124446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d22, d\dreg[\lane]
125446788007efe0a673d0366284026adfa17b36fedSimon Hosie    ifcc    nop
126446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d23, d\dreg[\lane]
127446788007efe0a673d0366284026adfa17b36fedSimon Hosie        .endif
128446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .endr ; .endr
129446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .if \rowclamp == 1
130446788007efe0a673d0366284026adfa17b36fedSimon Hosie        1: \labelc :
131446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           2f
132446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .else
133446788007efe0a673d0366284026adfa17b36fedSimon Hosie        2: \labelnc :
134446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .endif
135446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endr
136446788007efe0a673d0366284026adfa17b36fedSimon Hosie
137446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d20, q12, #16 - FRACTION_BITS
138446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d21, q13, #16 - FRACTION_BITS
139446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d22, q14, #16 - FRACTION_BITS
140446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d23, q15, #16 - FRACTION_BITS
141446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm /*}}}*/
142446788007efe0a673d0366284026adfa17b36fedSimon Hosie
143446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Some portion of the convolution window (as much as will fit, and all of it
144446788007efe0a673d0366284026adfa17b36fedSimon Hosie * for the uchar1 cases) is kept in the register file to avoid unnecessary
145446788007efe0a673d0366284026adfa17b36fedSimon Hosie * memory accesses.  This forces the horizontal loops to be unrolled because
146446788007efe0a673d0366284026adfa17b36fedSimon Hosie * there's no indexed addressing into the register file.
147446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
148446788007efe0a673d0366284026adfa17b36fedSimon Hosie * As in the fetch macro, the operations are ordered from outside to inside, so
149446788007efe0a673d0366284026adfa17b36fedSimon Hosie * that jumping into the middle of the block bypasses the unwanted window taps.
150446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
151446788007efe0a673d0366284026adfa17b36fedSimon Hosie * There are several variants of the macro because of the fixed offets of the
152446788007efe0a673d0366284026adfa17b36fedSimon Hosie * taps -- the wider the maximum radius the further the centre tap is from the
153446788007efe0a673d0366284026adfa17b36fedSimon Hosie * most recently fetched data.  This means that pre-filling the window requires
154446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more data that won't be used and it means that rotating the window involves
155446788007efe0a673d0366284026adfa17b36fedSimon Hosie * more mov operations.
156446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
157446788007efe0a673d0366284026adfa17b36fedSimon Hosie * When the buffer gets too big the buffer at [r9] is used.
158446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
159446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input:
160446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q4-q11 -- convoltion window
161446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r9 -- pointer to additional convolution window data
162446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output:
163446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r9 -- updated buffer pointer (if used)
164446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      d31 -- result to be stored
165446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies:
166446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r12 -- temp buffer pointer
167446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q12-q13 -- temporaries for load and vext operations.
168446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      q14-q15 -- intermediate sums
169446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
170446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST1 8, 16
171446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_8/*{{{*/
172446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d18, d0[0]
173446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d19, d0[0]
174446788007efe0a673d0366284026adfa17b36fedSimon Hosie
175446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
176446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
177446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
178446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
179446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
180446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
181446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
182446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
183446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
184446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
185446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
186446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    vmlal.u16   q14, d16, d2[0]
187446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d17, d2[0]
188446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d2[0]
189446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d2[0]
190446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    vext.u16    q12, q8, q9, #1
191446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #7
192446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
193446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
194446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[3]
195446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[3]
196446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    vext.u16    q12, q8, q9, #2
197446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #6
198446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
199446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
200446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[2]
201446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[2]
202446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    vext.u16    q12, q8, q9, #3
203446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #5
204446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
205446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
206446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[1]
207446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[1]
208446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    //vext.u16    q12, q8, q9, #4
209446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q9, q10, #4
210446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d1[0]
211446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d1[0]
212446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d1[0]
213446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d1[0]
214446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    vext.u16    q12, q8, q9, #5
215446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #3
216446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
217446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
218446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[3]
219446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[3]
220446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    vext.u16    q12, q8, q9, #6
221446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #2
222446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
223446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
224446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[2]
225446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[2]
226446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    vext.u16    q12, q8, q9, #7
227446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #1
228446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
229446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[1]
230446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[1]
231446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[1]
232446788007efe0a673d0366284026adfa17b36fedSimon Hosie
233446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
234446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
235446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
236446788007efe0a673d0366284026adfa17b36fedSimon Hosie
237446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
238446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
239446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
240446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
241446788007efe0a673d0366284026adfa17b36fedSimon Hosie
242446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_16/*{{{*/
243446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d16, d0[0]
244446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d17, d0[0]
245446788007efe0a673d0366284026adfa17b36fedSimon Hosie
246446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
247446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
248446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
249446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
250446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
251446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
252446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
253446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
254446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
255446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
256446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
257446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 109f-100b
258446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 110f-100b
259446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 111f-100b
260446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 112f-100b
261446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 113f-100b
262446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 114f-100b
263446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 115f-100b
264446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 116f-100b
265446788007efe0a673d0366284026adfa17b36fedSimon Hosie    116:    //vext.u16    q12, q6, q7, #0
266446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q10, q11, #0
267446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d12, d4[0]
268446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d13, d4[0]
269446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d4[0]
270446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d4[0]
271446788007efe0a673d0366284026adfa17b36fedSimon Hosie    115:    vext.u16    q12, q6, q7, #1
272446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #7
273446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[3]
274446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[3]
275446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[3]
276446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[3]
277446788007efe0a673d0366284026adfa17b36fedSimon Hosie    114:    vext.u16    q12, q6, q7, #2
278446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #6
279446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[2]
280446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[2]
281446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[2]
282446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[2]
283446788007efe0a673d0366284026adfa17b36fedSimon Hosie    113:    vext.u16    q12, q6, q7, #3
284446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #5
285446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[1]
286446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[1]
287446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[1]
288446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[1]
289446788007efe0a673d0366284026adfa17b36fedSimon Hosie    112:    //vext.u16    q12, q6, q7, #4
290446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q9, q10, #4
291446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d13, d3[0]
292446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d14, d3[0]
293446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d3[0]
294446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d3[0]
295446788007efe0a673d0366284026adfa17b36fedSimon Hosie    111:    vext.u16    q12, q6, q7, #5
296446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #3
297446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[3]
298446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[3]
299446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[3]
300446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[3]
301446788007efe0a673d0366284026adfa17b36fedSimon Hosie    110:    vext.u16    q12, q6, q7, #6
302446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #2
303446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[2]
304446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[2]
305446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[2]
306446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[2]
307446788007efe0a673d0366284026adfa17b36fedSimon Hosie    109:    vext.u16    q12, q6, q7, #7
308446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #1
309446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[1]
310446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[1]
311446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[1]
312446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[1]
313446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    //vext.u16    q12, q7, q8, #0
314446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q9, q10, #0
315446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d14, d2[0]
316446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d15, d2[0]
317446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d18, d2[0]
318446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d19, d2[0]
319446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    vext.u16    q12, q7, q8, #1
320446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #7
321446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
322446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
323446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[3]
324446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[3]
325446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    vext.u16    q12, q7, q8, #2
326446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #6
327446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
328446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
329446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[2]
330446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[2]
331446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    vext.u16    q12, q7, q8, #3
332446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #5
333446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
334446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
335446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[1]
336446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[1]
337446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    //vext.u16    q12, q7, q8, #4
338446788007efe0a673d0366284026adfa17b36fedSimon Hosie            //vext.u16    q13, q8, q9, #4
339446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d15, d1[0]
340446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d16, d1[0]
341446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d1[0]
342446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d1[0]
343446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    vext.u16    q12, q7, q8, #5
344446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #3
345446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
346446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
347446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[3]
348446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[3]
349446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    vext.u16    q12, q7, q8, #6
350446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #2
351446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
352446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
353446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[2]
354446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[2]
355446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    vext.u16    q12, q7, q8, #7
356446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #1
357446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
358446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[1]
359446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[1]
360446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[1]
361446788007efe0a673d0366284026adfa17b36fedSimon Hosie
362446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
363446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
364446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
365446788007efe0a673d0366284026adfa17b36fedSimon Hosie
366446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
367446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
368446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
369446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
370446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
371446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
372446788007efe0a673d0366284026adfa17b36fedSimon Hosie
373446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv1_25/*{{{*/
374446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q12, q6, q7, #7
375446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d24, d0[0]
376446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d25, d0[0]
377446788007efe0a673d0366284026adfa17b36fedSimon Hosie
378446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
379446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
380446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
381446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
382446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
383446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
384446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
385446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
386446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
387446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
388446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
389446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 109f-100b
390446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 110f-100b
391446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 111f-100b
392446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 112f-100b
393446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 113f-100b
394446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 114f-100b
395446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 115f-100b
396446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 116f-100b
397446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 117f-100b
398446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 118f-100b
399446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 119f-100b
400446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 120f-100b
401446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 121f-100b
402446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 122f-100b
403446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 123f-100b
404446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 124f-100b
405446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 125f-100b
406446788007efe0a673d0366284026adfa17b36fedSimon Hosie    125:    vext.u16    q12, q3, q4, #6
407446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q10, q11, #0
408446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d6[1]
409446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d6[1]
410446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d6[1]
411446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d6[1]
412446788007efe0a673d0366284026adfa17b36fedSimon Hosie    124:    vext.u16    q12, q3, q4, #7
413446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #7
414446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d6[0]
415446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d6[0]
416446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d6[0]
417446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d6[0]
418446788007efe0a673d0366284026adfa17b36fedSimon Hosie    123:    vext.u16    q12, q4, q5, #0
419446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #6
420446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[3]
421446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[3]
422446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d5[3]
423446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d5[3]
424446788007efe0a673d0366284026adfa17b36fedSimon Hosie    122:    vext.u16    q12, q4, q5, #1
425446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #5
426446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[2]
427446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[2]
428446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d5[2]
429446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d5[2]
430446788007efe0a673d0366284026adfa17b36fedSimon Hosie    121:    vext.u16    q12, q4, q5, #2
431446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #4
432446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[1]
433446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[1]
434446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d5[1]
435446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d5[1]
436446788007efe0a673d0366284026adfa17b36fedSimon Hosie    120:    vext.u16    q12, q4, q5, #3
437446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #3
438446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[0]
439446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[0]
440446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d5[0]
441446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d5[0]
442446788007efe0a673d0366284026adfa17b36fedSimon Hosie    119:    vext.u16    q12, q4, q5, #4
443446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #2
444446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[3]
445446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[3]
446446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d4[3]
447446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d4[3]
448446788007efe0a673d0366284026adfa17b36fedSimon Hosie    118:    vext.u16    q12, q4, q5, #5
449446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #1
450446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[2]
451446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[2]
452446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d4[2]
453446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d4[2]
454446788007efe0a673d0366284026adfa17b36fedSimon Hosie    117:    vext.u16    q12, q4, q5, #6
455446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q9, q10, #0
456446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[1]
457446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[1]
458446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d4[1]
459446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d4[1]
460446788007efe0a673d0366284026adfa17b36fedSimon Hosie    116:    vext.u16    q12, q4, q5, #7
461446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #7
462446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[0]
463446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[0]
464446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d4[0]
465446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d4[0]
466446788007efe0a673d0366284026adfa17b36fedSimon Hosie    115:    vext.u16    q12, q5, q6, #0
467446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #6
468446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[3]
469446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[3]
470446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[3]
471446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[3]
472446788007efe0a673d0366284026adfa17b36fedSimon Hosie    114:    vext.u16    q12, q5, q6, #1
473446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #5
474446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[2]
475446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[2]
476446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[2]
477446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[2]
478446788007efe0a673d0366284026adfa17b36fedSimon Hosie    113:    vext.u16    q12, q5, q6, #2
479446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #4
480446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[1]
481446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[1]
482446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[1]
483446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[1]
484446788007efe0a673d0366284026adfa17b36fedSimon Hosie    112:    vext.u16    q12, q5, q6, #3
485446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #3
486446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[0]
487446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[0]
488446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[0]
489446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d3[0]
490446788007efe0a673d0366284026adfa17b36fedSimon Hosie    111:    vext.u16    q12, q5, q6, #4
491446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #2
492446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[3]
493446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[3]
494446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[3]
495446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[3]
496446788007efe0a673d0366284026adfa17b36fedSimon Hosie    110:    vext.u16    q12, q5, q6, #5
497446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #1
498446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[2]
499446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[2]
500446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[2]
501446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[2]
502446788007efe0a673d0366284026adfa17b36fedSimon Hosie    109:    vext.u16    q12, q5, q6, #6
503446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q8, q9, #0
504446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[1]
505446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[1]
506446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[1]
507446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[1]
508446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    vext.u16    q12, q5, q6, #7
509446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #7
510446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[0]
511446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[0]
512446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[0]
513446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[0]
514446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    vext.u16    q12, q6, q7, #0
515446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #6
516446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
517446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
518446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[3]
519446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[3]
520446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    vext.u16    q12, q6, q7, #1
521446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #5
522446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
523446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
524446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[2]
525446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[2]
526446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    vext.u16    q12, q6, q7, #2
527446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #4
528446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
529446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
530446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[1]
531446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[1]
532446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    vext.u16    q12, q6, q7, #3
533446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #3
534446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[0]
535446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[0]
536446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[0]
537446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[0]
538446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    vext.u16    q12, q6, q7, #4
539446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #2
540446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
541446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
542446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[3]
543446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[3]
544446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    vext.u16    q12, q6, q7, #5
545446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #1
546446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
547446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
548446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[2]
549446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[2]
550446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    vext.u16    q12, q6, q7, #6
551446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q13, q7, q8, #0
552446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
553446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[1]
554446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[1]
555446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[1]
556446788007efe0a673d0366284026adfa17b36fedSimon Hosie
557446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
558446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
559446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
560446788007efe0a673d0366284026adfa17b36fedSimon Hosie
561446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        d7, d9
562446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q4, q5
563446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q5, q6
564446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
565446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
566446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
567446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
568446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
569446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
570446788007efe0a673d0366284026adfa17b36fedSimon Hosie
571446788007efe0a673d0366284026adfa17b36fedSimon Hosie#define TUNED_LIST4 6, 12
572446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_6/*{{{*/
573446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d14, d0[0]
574446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d15, d0[0]
575446788007efe0a673d0366284026adfa17b36fedSimon Hosie
576446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
577446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
578446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
579446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
580446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
581446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
582446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
583446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
584446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
585446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    vmlal.u16   q14, d8,  d1[2]
586446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d9,  d1[2]
587446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d1[2]
588446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d1[2]
589446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    vmlal.u16   q14, d9,  d1[1]
590446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d10, d1[1]
591446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d1[1]
592446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d1[1]
593446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    vmlal.u16   q14, d10, d1[0]
594446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d11, d1[0]
595446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d18, d1[0]
596446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d19, d1[0]
597446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    vmlal.u16   q14, d11, d0[3]
598446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d12, d0[3]
599446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d0[3]
600446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d0[3]
601446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    vmlal.u16   q14, d12, d0[2]
602446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d13, d0[2]
603446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d16, d0[2]
604446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d17, d0[2]
605446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    vmlal.u16   q14, d13, d0[1]
606446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d14, d0[1]
607446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d15, d0[1]
608446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d16, d0[1]
609446788007efe0a673d0366284026adfa17b36fedSimon Hosie
610446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
611446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
612446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
613446788007efe0a673d0366284026adfa17b36fedSimon Hosie
614446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q4, q5
615446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q5, q6
616446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
617446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
618446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
619446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
620446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
621446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
622446788007efe0a673d0366284026adfa17b36fedSimon Hosie
623446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_12/*{{{*/
624446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d8, d0[0]
625446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d9, d0[0]
626446788007efe0a673d0366284026adfa17b36fedSimon Hosie
627446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
628446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
629446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
630446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
631446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
632446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
633446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
634446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
635446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
636446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
637446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
638446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 109f-100b
639446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 110f-100b
640446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 111f-100b
641446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 112f-100b
642446788007efe0a673d0366284026adfa17b36fedSimon Hosie    112:    add         r12, r9, #0x1a0
643446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
644446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
645446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[0]
646446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[0]
647446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d3[0]
648446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d3[0]
649446788007efe0a673d0366284026adfa17b36fedSimon Hosie    111:    add         r12, r9, #0x1a8
650446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
651446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
652446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
653446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
654446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[3]
655446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[3]
656446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d2[3]
657446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d2[3]
658446788007efe0a673d0366284026adfa17b36fedSimon Hosie    110:    add         r12, r9, #0x1b0
659446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
660446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
661446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[2]
662446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[2]
663446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d18, d2[2]
664446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d19, d2[2]
665446788007efe0a673d0366284026adfa17b36fedSimon Hosie    109:    add         r12, r9, #0x1b8
666446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
667446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
668446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
669446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
670446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[1]
671446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[1]
672446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d2[1]
673446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d2[1]
674446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    add         r12, r9, #0x1c0
675446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
676446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
677446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[0]
678446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[0]
679446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d16, d2[0]
680446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d17, d2[0]
681446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    add         r12, r9, #0x1c8
682446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
683446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
684446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
685446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
686446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
687446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
688446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d15, d1[3]
689446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d16, d1[3]
690446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    add         r12, r9, #0x1d0
691446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
692446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
693446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
694446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
695446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d14, d1[2]
696446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d15, d1[2]
697446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    add         r12, r9, #0x1d8
698446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
699446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
700446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
701446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
702446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
703446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
704446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d13, d1[1]
705446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d14, d1[1]
706446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    add         r12, r9, #0x1e0
707446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
708446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
709446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[0]
710446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[0]
711446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d12, d1[0]
712446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d13, d1[0]
713446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    add         r12, r9, #0x1e8
714446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
715446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
716446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
717446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
718446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
719446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
720446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d11, d0[3]
721446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d12, d0[3]
722446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    add         r12, r9, #0x1f0
723446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
724446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
725446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
726446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
727446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d10, d0[2]
728446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d11, d0[2]
729446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    add         r12, r9, #0x1f8
730446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
731446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]
732446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
733446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d8,  d0[1]
734446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d9,  d0[1]
735446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d10, d0[1]
736446788007efe0a673d0366284026adfa17b36fedSimon Hosie
737446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
738446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
739446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
740446788007efe0a673d0366284026adfa17b36fedSimon Hosie
741446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {q4}, [r9:128]!
742446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r9, r9, #0x200
743446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q4, q5
744446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q5, q6
745446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
746446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
747446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
748446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
749446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
750446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
751446788007efe0a673d0366284026adfa17b36fedSimon Hosie
752446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro hconv4_25/*{{{*/
753446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r12, r9, #0x198
754446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
755446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
756446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
757446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12:64]
758446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q14, d24, d0[0]
759446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmull.u16   q15, d25, d0[0]
760446788007efe0a673d0366284026adfa17b36fedSimon Hosie
761446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [pc, r5, LSL #2]
762446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         pc, pc, r12
763446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bkpt
764446788007efe0a673d0366284026adfa17b36fedSimon Hosie    100:    .word 101f-100b
765446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 102f-100b
766446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 103f-100b
767446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 104f-100b
768446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 105f-100b
769446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 106f-100b
770446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 107f-100b
771446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 108f-100b
772446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 109f-100b
773446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 110f-100b
774446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 111f-100b
775446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 112f-100b
776446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 113f-100b
777446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 114f-100b
778446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 115f-100b
779446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 116f-100b
780446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 117f-100b
781446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 118f-100b
782446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 119f-100b
783446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 120f-100b
784446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 121f-100b
785446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 122f-100b
786446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 123f-100b
787446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 124f-100b
788446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word 125f-100b
789446788007efe0a673d0366284026adfa17b36fedSimon Hosie    125:    add         r12, r9, #0x0d0
790446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
791446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
792446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d6[1]
793446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d6[1]
794446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d20, d6[1]
795446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d21, d6[1]
796446788007efe0a673d0366284026adfa17b36fedSimon Hosie    124:    add         r12, r9, #0x0d8
797446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
798446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
799446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
800446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
801446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d6[0]
802446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d6[0]
803446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d19, d6[0]
804446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d20, d6[0]
805446788007efe0a673d0366284026adfa17b36fedSimon Hosie    123:    add         r12, r9, #0x0e0
806446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
807446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
808446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[3]
809446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[3]
810446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d18, d5[3]
811446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d19, d5[3]
812446788007efe0a673d0366284026adfa17b36fedSimon Hosie    122:    add         r12, r9, #0x0e8
813446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
814446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
815446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
816446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
817446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[2]
818446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[2]
819446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d17, d5[2]
820446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d18, d5[2]
821446788007efe0a673d0366284026adfa17b36fedSimon Hosie    121:    add         r12, r9, #0x0f0
822446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
823446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
824446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[1]
825446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[1]
826446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d16, d5[1]
827446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d17, d5[1]
828446788007efe0a673d0366284026adfa17b36fedSimon Hosie    120:    add         r12, r9, #0x0f8
829446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
830446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
831446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
832446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
833446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d5[0]
834446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d5[0]
835446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d15, d5[0]
836446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d16, d5[0]
837446788007efe0a673d0366284026adfa17b36fedSimon Hosie    119:    add         r12, r9, #0x100
838446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
839446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
840446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[3]
841446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[3]
842446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d14, d4[3]
843446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d15, d4[3]
844446788007efe0a673d0366284026adfa17b36fedSimon Hosie    118:    add         r12, r9, #0x108
845446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
846446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
847446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
848446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
849446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[2]
850446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[2]
851446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d13, d4[2]
852446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d14, d4[2]
853446788007efe0a673d0366284026adfa17b36fedSimon Hosie    117:    add         r12, r9, #0x110
854446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
855446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
856446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[1]
857446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[1]
858446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d12, d4[1]
859446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d13, d4[1]
860446788007efe0a673d0366284026adfa17b36fedSimon Hosie    116:    add         r12, r9, #0x118
861446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
862446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
863446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
864446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
865446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d4[0]
866446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d4[0]
867446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d11, d4[0]
868446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d12, d4[0]
869446788007efe0a673d0366284026adfa17b36fedSimon Hosie    115:    add         r12, r9, #0x120
870446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
871446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
872446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[3]
873446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[3]
874446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d10, d3[3]
875446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d11, d3[3]
876446788007efe0a673d0366284026adfa17b36fedSimon Hosie    114:    add         r12, r9, #0x128
877446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
878446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
879446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
880446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
881446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[2]
882446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[2]
883446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d9,  d3[2]
884446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d10, d3[2]
885446788007efe0a673d0366284026adfa17b36fedSimon Hosie    113:    add         r12, r9, #0x130
886446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
887446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
888446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[1]
889446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[1]
890446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d8,  d3[1]
891446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d9,  d3[1]
892446788007efe0a673d0366284026adfa17b36fedSimon Hosie    112:    add         r12, r9, #0x138
893446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
894446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
895446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
896446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
897446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1f8
898446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
899446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]
900446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d3[0]
901446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d3[0]
902446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d3[0]   @ Could be d7, without the load, right?
903446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d8,  d3[0]
904446788007efe0a673d0366284026adfa17b36fedSimon Hosie    111:    add         r12, r9, #0x140
905446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
906446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
907446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1f0
908446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
909446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
910446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[3]
911446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[3]
912446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[3]
913446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[3]
914446788007efe0a673d0366284026adfa17b36fedSimon Hosie    110:    add         r12, r9, #0x148
915446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
916446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
917446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
918446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
919446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1e8
920446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
921446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
922446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
923446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
924446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[2]
925446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[2]
926446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[2]
927446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[2]
928446788007efe0a673d0366284026adfa17b36fedSimon Hosie    109:    add         r12, r9, #0x150
929446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
930446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
931446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1e0
932446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
933446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
934446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[1]
935446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[1]
936446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[1]
937446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[1]
938446788007efe0a673d0366284026adfa17b36fedSimon Hosie    108:    add         r12, r9, #0x158
939446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
940446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
941446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
942446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
943446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1d8
944446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
945446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
946446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
947446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
948446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d2[0]
949446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d2[0]
950446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d2[0]
951446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d2[0]
952446788007efe0a673d0366284026adfa17b36fedSimon Hosie    107:    add         r12, r9, #0x160
953446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
954446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
955446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1d0
956446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
957446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
958446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[3]
959446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[3]
960446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[3]
961446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[3]
962446788007efe0a673d0366284026adfa17b36fedSimon Hosie    106:    add         r12, r9, #0x168
963446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
964446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
965446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
966446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
967446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1c8
968446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
969446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
970446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
971446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
972446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[2]
973446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[2]
974446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[2]
975446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[2]
976446788007efe0a673d0366284026adfa17b36fedSimon Hosie    105:    add         r12, r9, #0x170
977446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
978446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
979446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1c0
980446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
981446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
982446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[1]
983446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[1]
984446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[1]
985446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[1]
986446788007efe0a673d0366284026adfa17b36fedSimon Hosie    104:    add         r12, r9, #0x178
987446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
988446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
989446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
990446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
991446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1b8
992446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
993446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
994446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
995446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
996446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d1[0]
997446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d1[0]
998446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d1[0]
999446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d1[0]
1000446788007efe0a673d0366284026adfa17b36fedSimon Hosie    103:    add         r12, r9, #0x180
1001446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1002446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]
1003446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1b0
1004446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
1005446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26,d27}, [r12:128]
1006446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[3]
1007446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[3]
1008446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[3]
1009446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[3]
1010446788007efe0a673d0366284026adfa17b36fedSimon Hosie    102:    add         r12, r9, #0x188
1011446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1012446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24}, [r12:64]!
1013446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1014446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d25}, [r12]
1015446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            add         r12, r9, #0x1a8
1016446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
1017446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d26}, [r12:64]!
1018446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            bic         r12, r12, #0x200
1019446788007efe0a673d0366284026adfa17b36fedSimon Hosie                                            vld1.u16    {d27}, [r12:64]
1020446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[2]
1021446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[2]
1022446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[2]
1023446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[2]
1024446788007efe0a673d0366284026adfa17b36fedSimon Hosie    101:    add         r12, r9, #0x190
1025446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1026446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d24,d25}, [r12:128]!
1027446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r12, r12, #0x200
1028446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d26,d27}, [r12:128]
1029446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d24, d0[1]
1030446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d25, d0[1]
1031446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q14, d26, d0[1]
1032446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmlal.u16   q15, d27, d0[1]
1033446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1034446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d28, q14, #16
1035446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u32 d29, q15, #16
1036446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vqrshrn.u16 d31, q14, #FRACTION_BITS
1037446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1038446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {q4}, [r9:128]!
1039446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r9, r9, #0x200
1040446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q4, q5
1041446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q5, q6
1042446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q6, q7
1043446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q7, q8
1044446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q8, q9
1045446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q9, q10
1046446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
1047446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm/*}}}*/
1048446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1049446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Dedicated function wrapper for the fetch macro, for the cases where
1050446788007efe0a673d0366284026adfa17b36fedSimon Hosie * performance isn't that important, to keep code size down.
1051446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1052446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(fetch_generic_asm)
1053446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r10,r11}
1054446788007efe0a673d0366284026adfa17b36fedSimon Hosie            fetch
1055446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r10,r11}
1056446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          lr
1057446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(fetch_generic_asm)
1058446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1059446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Given values in q10 and q11, and an index in r11, sweep the (r11&15)th value
1060446788007efe0a673d0366284026adfa17b36fedSimon Hosie * across to fill the rest of the register pair.  Used for filling the right
1061446788007efe0a673d0366284026adfa17b36fedSimon Hosie * hand edge of the window when starting too close to the right hand edge of
1062446788007efe0a673d0366284026adfa17b36fedSimon Hosie * the image.
1063446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1064446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(prefetch_clamp1)
1065446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r11, r11, #0
1066446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r11, #8
1067446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1068446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    q11, q10
1069446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, #16
1070446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          vmov.u16    q12, q11
1071446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.i8     d26, #0xff
1072446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r11, #4
1073446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1074446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q12, q12, q12, #4
1075446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, #8
1076446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vshl.u64    d26, d26, #32
1077446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r11, #2
1078446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1079446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q12, q12, q12, #6
1080446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, #4
1081446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vshl.u64    d26, d26, #16
1082446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r11, #1
1083446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1084446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q12, q12, q12, #7
1085446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, #2
1086446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vshl.u64    d26, d26, #8
1087446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          vdup.u16    q12, d25[2]
1088446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmovl.s8    q13, d26
1089446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vbif        q11, q12, q13
1090446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r11, #8
1091446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1092446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
1093446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q11, q12
1094446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          rsb         r11, r11, #0
1095446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          lr
1096446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(prefetch_clamp1)
1097446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1098446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(prefetch_clamp4)
1099446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r11, r11, #0
1100446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r11, #8
1101446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1102446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, #16
1103446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    q11, q10
1104446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          vmov        d24, d23
1105446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r11, #4
1106446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1107446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        d24, d22
1108446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, #8
1109446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        d23, d22
1110446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          vmov        d25, d24
1111446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r11, #8
1112446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1113446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
1114446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q11, q12
1115446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          rsb         r11, r11, #0
1116446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          lr
1117446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(prefetch_clamp4)
1118446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1119446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1120446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Helpers for prefetch, below.
1121446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1122446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_out qa, qb, store, qsa, qsb, qsb_hi
1123446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \store > 0
1124446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .ifc \qsa,\qsb
1125446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u16    {\qsa}, [r9:128]!
1126446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u16    {\qsb}, [r9:128]!
1127446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .else
1128446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u16    {\qsa,\qsb}, [r9:256]!
1129446788007efe0a673d0366284026adfa17b36fedSimon Hosie    .endif
1130446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .elseif \store == 0
1131446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    \qa, \qsa
1132446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    \qb, \qsb
1133446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1134446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    \qb, \qsb_hi
1135446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1136446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
1137446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1138446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch_one  qa, qb, rem, c, store=0, step=1
1139446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set i, (need - 16) - \rem
1140446788007efe0a673d0366284026adfa17b36fedSimon Hosie.if i >= 0
1141446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          cmp         r10, #i+16
1142446788007efe0a673d0366284026adfa17b36fedSimon Hosie            blo         2f
1143446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_out \qa, \qb, \store, q9, q9, d19
1144446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           1f
1145446788007efe0a673d0366284026adfa17b36fedSimon Hosie2:          cmp         r11, #i+16
1146446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bls         3f
1147446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_out \qa, \qb, \store, q10, q11, d23
1148446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bl          fetch_generic_asm
1149446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           2f
1150446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:          bl          prefetch_clamp\step
1151446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_out \qa, \qb, \store, q10, q11, d23
1152446788007efe0a673d0366284026adfa17b36fedSimon Hosie4:          b           4f+4
1153446788007efe0a673d0366284026adfa17b36fedSimon Hosie            @q12 contains pad word from prefetch_clam call
1154446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_out \qa, \qb, \store, q12, q12, d25
1155446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \rem > 0
1156446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           4f+4
1157446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1158446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:
1159446788007efe0a673d0366284026adfa17b36fedSimon Hosie2:
1160446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:
1161446788007efe0a673d0366284026adfa17b36fedSimon Hosie4:          nop
1162446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1163446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endif
1164446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
1165446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1166446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* Fill the convolution window with context data.  The aim here is to load
1167446788007efe0a673d0366284026adfa17b36fedSimon Hosie * exactly rlf + rrt columns, and in the main loop to read as many columns as
1168446788007efe0a673d0366284026adfa17b36fedSimon Hosie * will be written.  This is complicated by the need to handle cases when the
1169446788007efe0a673d0366284026adfa17b36fedSimon Hosie * input starts very close to the left or right (or both) edges of the image,
1170446788007efe0a673d0366284026adfa17b36fedSimon Hosie * and where these do not fall on 16-byte boundaries.
1171446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
1172446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input:
1173446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 -- src
1174446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r2 -- pitch
1175446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r3 -- count
1176446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r4 -- inlen
1177446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r5 -- r
1178446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r6 -- rup
1179446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r7 -- rdn
1180446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r8 -- rlf
1181446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r9 -- buffer (if needed)
1182446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Output:
1183446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 += rlf + min(count, rrt)
1184446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies:
1185446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r10 -- fill start index in the window
1186446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r11 -- fill stop index in the window
1187446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r12 -- scratch
1188446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1189446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro prefetch step=1, max_r=25
1190446788007efe0a673d0366284026adfa17b36fedSimon Hosie.set need, ((\max_r + \max_r) * \step + 15) & ~15
1191446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step == 1
1192446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r10, r8, #need - (\max_r * \step)
1193446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1194446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mov         r10, r8, LSL #2
1195446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r10, r10, #need - (\max_r * \step)
1196446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1197446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r11, r10, r4
1198446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r11, #need
1199446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r11, #need
1200446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1201446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bl          fetch_generic_asm
1202446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step == 1
1203446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q9, d20[0]
1204446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1205446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    d18, d20
1206446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    d19, d20
1207446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1208446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r10, #15
1209446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         2f
1210446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r12, r10, #0
1211446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r10, #8
1212446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1213446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    q11, q10
1214446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u16    q10, q9
1215446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r12, #4
1216446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1217446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q10, q11, #4
1218446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q9, q10, #4
1219446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step == 1
1220446788007efe0a673d0366284026adfa17b36fedSimon Hosie  1:        tst         r12, #2
1221446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1222446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q10, q11, #2
1223446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q9, q10, #2
1224446788007efe0a673d0366284026adfa17b36fedSimon Hosie  1:        tst         r12, #1
1225446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1226446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q10, q11, #1
1227446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q9, q10, #1
1228446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1229446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          sub         r1, r1, r10
1230446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r10, r10, #15
1231446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, r10
1232446788007efe0a673d0366284026adfa17b36fedSimon Hosie2:
1233446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step > 1
1234446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* it's only in the uchar2 and uchar4 cases where the register file
1235446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * is insufficient (given MAX_R <= 25).
1236446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1237446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 192, c=\max_r, step=\step, store=1
1238446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 176, c=\max_r, step=\step, store=1
1239446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 160, c=\max_r, step=\step, store=1
1240446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 144, c=\max_r, step=\step, store=1
1241446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 128, c=\max_r, step=\step, store=1
1242446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx, 112, c=\max_r, step=\step, store=1
1243446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx,  96, c=\max_r, step=\step, store=1
1244446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx,  80, c=\max_r, step=\step, store=1
1245446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx,  64, c=\max_r, step=\step, store=1
1246446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, xx,  48, c=\max_r, step=\step, store=1
1247446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1248446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* q3 normally contains the coefficient table, but it's not fully
1249446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * used.  In the uchar1, r=25 case the other half of q3 is used for
1250446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * the last two window taps to avoid falling out to memory.
1251446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1252446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one xx, d7,  48, c=\max_r, step=\step, store=-1
1253446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1254446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one q4, q5,  32, c=\max_r, step=\step, store=0
1255446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one q6, q7,  16, c=\max_r, step=\step, store=0
1256446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch_one q8, q9,   0, c=\max_r, step=\step, store=0
1257446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1258446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step == 1
1259446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r10, r8, #\max_r * \step
1260446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1261446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mov         r10, r8, LSL #2
1262446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r10, r10, #\max_r * \step
1263446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1264446788007efe0a673d0366284026adfa17b36fedSimon Hosie            subs        r4, r4, r10
1265446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movlo       r4, #0
1266446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
1267446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1268446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* The main loop.
1269446788007efe0a673d0366284026adfa17b36fedSimon Hosie *
1270446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Input:
1271446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r0 = dst
1272446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r1 = src
1273446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r2 = pitch
1274446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r3 = count
1275446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r4 = inlen
1276446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r5 = r
1277446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r6 = rup
1278446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r7 = rdn
1279446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r9 = buffer
1280446788007efe0a673d0366284026adfa17b36fedSimon Hosie * Modifies
1281446788007efe0a673d0366284026adfa17b36fedSimon Hosie *      r8 = fetch code pointer
1282446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1283446788007efe0a673d0366284026adfa17b36fedSimon Hosie.macro mainloop core, step=1, max_r=25, labelc="", labelnc=""
1284446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r8, 3f
1285446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          add         r8, r8, pc
1286446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r8, r5, LSL #5
1287446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r8, r5, LSL #4
1288446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r5, r6
1289446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmpeq       r5, r7
1290446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         5f
1291446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1292446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* if (r != rup || r != rdn) then the address-clamping table should
1293446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * be used rather than the short-cut version.
1294446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1295446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r8, 3f+4
1296446788007efe0a673d0366284026adfa17b36fedSimon Hosie2:          add         r8, r8, pc
1297446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r8, r5, LSL #6
1298446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           5f
1299446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .align 3
1300446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:          .word       \labelnc-1b-8
1301446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .word       \labelc-2b-8
1302446788007efe0a673d0366284026adfa17b36fedSimon Hosie            .align 4
1303446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:          fetch max_r=\max_r, labelc=\labelc, labelnc=\labelnc, reg=r8
1304446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1305446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* For each call to fetch two are made to \core.  It would be
1306446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * preferable to have twice the work done in \core, but the
1307446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * register file is too small for this to be straightforward.
1308446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1309446788007efe0a673d0366284026adfa17b36fedSimon Hosie            \core
1310446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {d31}, [r0]!
1311446788007efe0a673d0366284026adfa17b36fedSimon Hosie            \core
1312446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {d31}, [r0]!
1313446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1314446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r3, r3, #16
1315446788007efe0a673d0366284026adfa17b36fedSimon Hosie5:          subs        r4, r4, #16
1316446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bhs         3b
1317446788007efe0a673d0366284026adfa17b36fedSimon Hosie            adds        r4, r4, #16
1318446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bne         1f
1319446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step==1
1320446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q10, d19[3]
1321446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q11, d19[3]
1322446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1323446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d20, d19
1324446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d21, d19
1325446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d22, d19
1326446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d23, d19
1327446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1328446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           4f
1329446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1330446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          sub         r1, r1, #16
1331446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, r4
1332446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bl          fetch_generic_asm
1333446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1334446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step==1
1335446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q12, d23[3]
1336446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1337446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d24, d23
1338446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d25, d23
1339446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1340446788007efe0a673d0366284026adfa17b36fedSimon Hosie            rsb         r4, r4, #0
1341446788007efe0a673d0366284026adfa17b36fedSimon Hosie            tst         r4, #8
1342446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1343446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q10, q11
1344446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov        q11, q12
1345446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r4, #4
1346446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1347446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q10, q11, #4
1348446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q11, q12, #4
1349446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r4, #2
1350446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1351446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q10, q11, #2
1352446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q11, q12, #2
1353446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r4, #1
1354446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         4f
1355446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q10, q10, q11, #1
1356446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u16    q11, q11, q12, #1
1357446788007efe0a673d0366284026adfa17b36fedSimon Hosie4:          cmp         r3, #0
1358446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         5f
1359446788007efe0a673d0366284026adfa17b36fedSimon Hosie3:          \core
1360446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .if \step==1
1361446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vdup.u16    q11, d23[3]
1362446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .else
1363446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vmov.u64    d22, d23
1364446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endif
1365446788007efe0a673d0366284026adfa17b36fedSimon Hosie            subs        r3, r3, #8
1366446788007efe0a673d0366284026adfa17b36fedSimon Hosie            blo         4f
1367446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {d31}, [r0]!
1368446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         5f
1369446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           3b
1370446788007efe0a673d0366284026adfa17b36fedSimon Hosie4:          tst         r3, #4
1371446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1372446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u32    {d31[0]}, [r0]!
1373446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u8     d31, d31, d31, #4
1374446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r3, #2
1375446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         1f
1376446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u16    {d31[0]}, [r0]!
1377446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u8     d31, d31, d31, #2
1378446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          tst         r3, #1
1379446788007efe0a673d0366284026adfa17b36fedSimon Hosie            beq         5f
1380446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vst1.u8     {d31[0]}, [r0]!
1381446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vext.u8     d31, d31, d31, #1
1382446788007efe0a673d0366284026adfa17b36fedSimon Hosie5:          nop
1383446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endm
1384446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1385446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST1, 25
1386446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(convolve1_\r)
1387446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r12,lr}
1388446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1389446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, r8
1390446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1391446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch    step=1, max_r=\r
1392446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1393446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mainloop    core=hconv1_\r, step=1, max_r=\r, labelc=.Lcnv1_\r, labelnc=.Lcnvnc1_\r
1394446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1395446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r12,pc}
1396446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve1_\r)
1397446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr
1398446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1399446788007efe0a673d0366284026adfa17b36fedSimon Hosie.irep r, TUNED_LIST4, 25
1400446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(convolve4_\r)
1401446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r12, sp, #0x200
1402446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bic         r9, r12, #0x3fc
1403446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mov         sp, r9
1404446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r12,lr}
1405446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1406446788007efe0a673d0366284026adfa17b36fedSimon Hosie            /* r9 now points to a buffer on the stack whose address has the low
1407446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * 10 bits clear.  This allows easy address calculation in the
1408446788007efe0a673d0366284026adfa17b36fedSimon Hosie             * wrap-around cases.
1409446788007efe0a673d0366284026adfa17b36fedSimon Hosie             */
1410446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1411446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r1, r1, r8, LSL #2
1412446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1413446788007efe0a673d0366284026adfa17b36fedSimon Hosie            prefetch    step=4, max_r=\r
1414446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1415446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mainloop    core=hconv4_\r, step=4, max_r=\r, labelc=.Lcnv4_\r, labelnc=.Lcnvnc4_\r
1416446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1417446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r12,lr}
1418446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         sp, r12, #0x200
1419446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bx          lr
1420446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(convolve4_\r)
1421446788007efe0a673d0366284026adfa17b36fedSimon Hosie.endr
1422446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1423446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU1_K(
1424446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  void *out,      // r0
1425446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  void *in,       // r1
1426446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t w,       // r2
1427446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t h,       // r3
1428446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t p,       // [sp]
1429446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t x,       // [sp,#4]
1430446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t y,       // [sp,#8]
1431446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t count,   // [sp,#12]
1432446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t r,       // [sp,#16]
1433446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  uint16_t *tab); // [sp,#20]
1434446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1435446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU1_K)
1436446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1437446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vpush       {d8-d15}
1438446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r5, [sp,#120]
1439446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r8, [sp,#108]
1440446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r6, [sp,#112]
1441446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r9, r2, r8
1442446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r7, r3, r6
1443446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r2, [sp,#104]
1444446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r3, [sp,#116]
1445446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r9, r9, r3
1446446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r7, r7, #1
1447446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1448446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [sp,#124]
1449446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1450446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r0, r0, r8 @, LSL #2 /* for blur4 option */
1451446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, r8 @, LSL #2 /* for blur4 option */
1452446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1453446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r6, r5
1454446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r6, r5
1455446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r7, r5
1456446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r7, r5
1457446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r8, r5
1458446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r8, r5
1459446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r9, r5
1460446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r9, r5
1461446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1462446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r4, r8, r9
1463446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r4, r4, r3
1464446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1465446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d0,d1,d2,d3}, [r12]!
1466446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d4,d5,d6}, [r12]!
1467446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1468446788007efe0a673d0366284026adfa17b36fedSimon Hosie            adr         lr, 1f
1469446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .irep r, TUNED_LIST1
1470446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r5, #\r
1471446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bls         convolve1_\r
1472446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endr
1473446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           convolve1_25
1474446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1475446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          vpop        {d8-d15}
1476446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
1477446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU1_K)
1478446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1479446788007efe0a673d0366284026adfa17b36fedSimon Hosie/* void rsdIntrinsicBlurU4_K(
1480446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  void *out,      // r0
1481446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  void *in,       // r1
1482446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t w,       // r2
1483446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t h,       // r3
1484446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t p,       // [sp]
1485446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t x,       // [sp,#4]
1486446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t y,       // [sp,#8]
1487446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t count,   // [sp,#12]
1488446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  size_t r,       // [sp,#16]
1489446788007efe0a673d0366284026adfa17b36fedSimon Hosie *                  uint16_t *tab); // [sp,#20]
1490446788007efe0a673d0366284026adfa17b36fedSimon Hosie */
1491446788007efe0a673d0366284026adfa17b36fedSimon HosieENTRY(rsdIntrinsicBlurU4_K)
1492446788007efe0a673d0366284026adfa17b36fedSimon Hosie            push        {r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1493446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vpush       {d8-d15}
1494446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r5, [sp,#120]
1495446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r8, [sp,#108]
1496446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r6, [sp,#112]
1497446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r9, r2, r8
1498446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r7, r3, r6
1499446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r2, [sp,#104]
1500446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r3, [sp,#116]
1501446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r9, r9, r3
1502446788007efe0a673d0366284026adfa17b36fedSimon Hosie            sub         r7, r7, #1
1503446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1504446788007efe0a673d0366284026adfa17b36fedSimon Hosie            ldr         r12, [sp,#124]
1505446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1506446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r0, r0, r8, LSL #2
1507446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r1, r1, r8, LSL #2
1508446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1509446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r6, r5
1510446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r6, r5
1511446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r7, r5
1512446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r7, r5
1513446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r8, r5
1514446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r8, r5
1515446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r9, r5
1516446788007efe0a673d0366284026adfa17b36fedSimon Hosie            movhi       r9, r5
1517446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1518446788007efe0a673d0366284026adfa17b36fedSimon Hosie            mov         r3, r3, LSL #2
1519446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r4, r8, r9
1520446788007efe0a673d0366284026adfa17b36fedSimon Hosie            add         r4, r3, r4, LSL #2
1521446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1522446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d0,d1,d2,d3}, [r12]!
1523446788007efe0a673d0366284026adfa17b36fedSimon Hosie            vld1.u16    {d4,d5,d6}, [r12]!
1524446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1525446788007efe0a673d0366284026adfa17b36fedSimon Hosie            adr         lr, 1f
1526446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .irep r, TUNED_LIST4
1527446788007efe0a673d0366284026adfa17b36fedSimon Hosie            cmp         r5, #\r
1528446788007efe0a673d0366284026adfa17b36fedSimon Hosie            bls         convolve4_\r
1529446788007efe0a673d0366284026adfa17b36fedSimon Hosie  .endr
1530446788007efe0a673d0366284026adfa17b36fedSimon Hosie            b           convolve4_25
1531446788007efe0a673d0366284026adfa17b36fedSimon Hosie
1532446788007efe0a673d0366284026adfa17b36fedSimon Hosie1:          vpop        {d8-d15}
1533446788007efe0a673d0366284026adfa17b36fedSimon Hosie            pop         {r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
1534446788007efe0a673d0366284026adfa17b36fedSimon HosieEND(rsdIntrinsicBlurU4_K)
1535