17913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang@ Tremolo library
22da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@-----------------------------------------------------------------------
32da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ Copyright (C) 2002-2009, Xiph.org Foundation
42da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
52da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ All rights reserved.
62da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang
72da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ Redistribution and use in source and binary forms, with or without
82da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ modification, are permitted provided that the following conditions
92da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ are met:
102da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang
112da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@     * Redistributions of source code must retain the above copyright
122da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ notice, this list of conditions and the following disclaimer.
132da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@     * Redistributions in binary form must reproduce the above
142da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ copyright notice, this list of conditions and the following disclaimer
152da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ in the documentation and/or other materials provided with the
162da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ distribution.
172da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@     * Neither the names of the Xiph.org Foundation nor Pinknoise
182da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ Productions Ltd nor the names of its contributors may be used to
192da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ endorse or promote products derived from this software without
202da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ specific prior written permission.
212da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@
222da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
232da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
242da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
252da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
262da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
272da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
282da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
292da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
302da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
312da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
322da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
332da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ ----------------------------------------------------------------------
347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang    .text
367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ low accuracy version
387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.global mdct_backwardARM
407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.global mdct_shift_right
417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.global mdct_unroll_prelap
427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.global mdct_unroll_part2
437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.global mdct_unroll_part3
447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.global mdct_unroll_postlap
457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.extern	sincos_lookup0
477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.extern	sincos_lookup1
487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_unroll_prelap:
507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = out
517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = post
527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = r
537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = step
547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r4-r7,r14}
557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MVN	r4, #0x8000
567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r3, r3, LSL #1
577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r1, r2, r1		@ r1 = r - post
587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, #16		@ r1 = r - post - 16
597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLT	unroll_over
607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop:
617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMDB	r2!,{r5,r6,r7,r12}
627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5, r5, ASR #9		@ r5 = (*--r)>>9
647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #9		@ r6 = (*--r)>>9
657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7, r7, ASR #9		@ r7 = (*--r)>>9
667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r12,ASR #9		@ r12= (*--r)>>9
677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r12,ASR #15
697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r12,r4, r14,ASR #31
717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r12,[r0], r3
727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r7, ASR #15
747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r7, r4, r14,ASR #31
767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r7, [r0], r3
777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r6, ASR #15
797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r6, r4, r14,ASR #31
817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r6, [r0], r3
827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r5, ASR #15
847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r5, r4, r14,ASR #31
867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r5, [r0], r3
877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, #16
897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGE	unroll_loop
907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over:
927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADDS	r1, r1, #16
937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLE	unroll_end
947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop2:
957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r5,[r2,#-4]!
967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall
977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall (Xscale)
987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5, r5, ASR #9		@ r5 = (*--r)>>9
997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r5, ASR #15
1007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
1017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r5, r4, r14,ASR #31
1027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r5, [r0], r3
1037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, #4
1047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	unroll_loop2
1057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_end:
1067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13!,{r4-r7,PC}
1077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_unroll_postlap:
1097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = out
1107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = post
1117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = l
1127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = step
1137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r4-r7,r14}
1147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MVN	r4, #0x8000
1157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r3, r3, LSL #1
1167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r1, r1, r2		@ r1 = post - l
1177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r1, r1, ASR #1		@ r1 = (post - l)>>1
1187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, #16		@ r1 = ((post - l)>>1) - 4
1197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLT	unroll_over3
1207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop3:
1217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r12,[r2],#8
1227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r7, [r2],#8
1237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r6, [r2],#8
1247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r5, [r2],#8
1257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r12,r12,#0
1277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r5, r5, #0
1287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r6, r6, #0
1297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r7, r7, #0
1307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12, r12,ASR #9		@ r12= (-*l)>>9
1327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5,  r5, ASR #9		@ r5 = (-*l)>>9
1337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6,  r6, ASR #9		@ r6 = (-*l)>>9
1347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7,  r7, ASR #9		@ r7 = (-*l)>>9
1357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r12,ASR #15
1377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
1387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r12,r4, r14,ASR #31
1397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r12,[r0], r3
1407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r7, ASR #15
1427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
1437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r7, r4, r14,ASR #31
1447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r7, [r0], r3
1457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r6, ASR #15
1477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
1487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r6, r4, r14,ASR #31
1497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r6, [r0], r3
1507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r5, ASR #15
1527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
1537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r5, r4, r14,ASR #31
1547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r5, [r0], r3
1557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, #16
1577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGE	unroll_loop3
1587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over3:
1607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADDS	r1, r1, #16
1617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLE	unroll_over4
1627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop4:
1637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r5,[r2], #8
1647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall
1657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall (Xscale)
1667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r5, r5, #0
1677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5, r5, ASR #9		@ r5 = (-*l)>>9
1687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r5, ASR #15
1697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
1707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r5, r4, r14,ASR #31
1717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r5, [r0], r3
1727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, #4
1737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	unroll_loop4
1747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over4:
1757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13!,{r4-r7,PC}
1767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
1777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_unroll_part2:
1787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = out
1797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = post
1807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = l
1817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = r
1827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ <> = step
1837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ <> = wL
1847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ <> = wR
1857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r13
1867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r4,r6-r11,r14}
1877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r12,{r8,r9,r10}		@ r8 = step
1887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = wL
1897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= wR
1907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MVN	r4, #0x8000
1917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, LSL #1
1927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r3, r1		@ r1 = (r - post)
1937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLE	unroll_over5
1947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop5:
1957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r12,[r2, #-8]!		@ r12= *l       (but l -= 2 first)
1967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r7, [r3, #-4]!		@ r7 = *--r
1977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r6, [r10,#-1]!		@ r6 = *--wR
1987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r11,[r9],#1		@ r11= *wL++
1997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12, r12, ASR #8
2017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ Can save a cycle here, at the cost of 1bit errors in rounding
2027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r11,r12,r11		@ r11  = *l   * *wL++
2037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7, r7, ASR #8
2047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r6, r7, r6, r11		@ r6   = *--r * *--wR
2057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #9
2067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r6, ASR #15
2077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
2087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r6, r4, r14,ASR #31
2097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r6, [r0], r8
2107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, #4
2127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	unroll_loop5
2137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over5:
2157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13!,{r4,r6-r11,PC}
2167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_unroll_part3:
2187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = out
2197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = post
2207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = l
2217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = r
2227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ <> = step
2237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ <> = wL
2247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ <> = wR
2257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r13
2267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r4,r6-r11,r14}
2277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r12,{r8,r9,r10}		@ r8 = step
2287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = wL
2297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= wR
2307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MVN	r4, #0x8000
2317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, LSL #1
2327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, r3		@ r1 = (post - r)
2337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLE	unroll_over6
2347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop6:
2357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r12,[r2],#8		@ r12= *l       (but l += 2 first)
2367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r7, [r3],#4		@ r7 = *r++
2377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r11,[r9],#1		@ r11= *wL++
2387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r6, [r10,#-1]!		@ r6 = *--wR
2397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ Can save a cycle here, at the cost of 1bit errors in rounding
2417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r12,ASR #8
2427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r11,r12,r11		@ (r14,r11)  = *l   * *wL++
2437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7, r7, ASR #8
2447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r6, r7, r6		@ (r14,r6)   = *--r * *--wR
2457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r6, r6, r11
2467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #9
2477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r6, ASR #15
2487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TEQ	r14,r14,ASR #31		@ if r14==0 || r14==-1 then in range
2497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	EORNE	r6, r4, r14,ASR #31
2507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRH	r6, [r0], r8
2517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r1, r1, #4
2537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	unroll_loop6
2547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over6:
2567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13!,{r4,r6-r11,PC}
2577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_shift_right:
2597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = n
2607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in
2617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = right
2627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r4-r11,r14}
2637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r0, r0, LSR #2		@ n >>= 2
2657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r1, r1, #4
2667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r0, r0,	#8
2687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLT	sr_less_than_8
2697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangsr_loop:
2707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r3, [r1], #8
2717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r4, [r1], #8
2727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r5, [r1], #8
2737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r6, [r1], #8
2747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r7, [r1], #8
2757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r8, [r1], #8
2767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r12,[r1], #8
2777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r14,[r1], #8
2787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r0, r0, #8
2797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r2!,{r3,r4,r5,r6,r7,r8,r12,r14}
2807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGE	sr_loop
2817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangsr_less_than_8:
2827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADDS	r0, r0, #8
2837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BEQ	sr_end
2847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangsr_loop2:
2857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r3, [r1], #8
2867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r0, r0, #1
2877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r3, [r2], #4
2887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	sr_loop2
2897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangsr_end:
2907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13!,{r4-r11,PC}
2917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_backwardARM:
2937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = n
2947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in
2957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r4-r11,r14}
2967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
2977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, #1<<4	@ r2 = 1<<shift
2987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r3, #13-4	@ r3 = 13-shift
2997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangfind_shift_loop:
3007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	TST	r0, r2		@ if (n & (1<<shift)) == 0
3017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, r2, LSL #1
3027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBEQ	r3, r3, #1	@ shift--
3037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BEQ	find_shift_loop
3047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, #2
3057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, r2, LSL r3	@ r2 = step = 2<<shift
3067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ presymmetry
3087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = n (a multiple of 4)
3097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in
3107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = step
3117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = shift
3127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r4, r1, r0, LSL #1	@ r4 = aX = in+(n>>1)
3147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r14,r1, r0		@ r14= in+(n>>2)
3157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r4, r4, #3*4		@ r4 = aX = in+n2-3
3167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r5, =sincos_lookup0	@ r5 = T=sincos_lookup0
3177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangpresymmetry_loop1:
3197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r7, [r4,#8]		@ r6 = s2 = aX[2]
3207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r11,[r5,#1]		@ r11= T[1]
3217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r6, [r4],#-16		@ r6 = s0 = aX[0]
3227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r10,[r5],r2		@ r10= T[0]   T += step
3237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #8
3247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7, r7, ASR #8
3257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XPROD31(s0, s2, T[0], T[1], 0xaX[0], &ax[2])
3277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r9, r6, r10		@ r9   = s0*T[0]
3287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r6, r6, #0
3297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r9, r7, r11,r9		@ r9  += s2*T[1]
3307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r4, r14
3317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r7, r10		@ r12  = s2*T[0]
3327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r9, [r4,#16]		@ aX[0] = r9
3337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r12,r6, r11,r12		@ r12 -= s0*T[1]
3347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r12,[r4,#8+16]		@ aX[2] = r12
3357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGE	presymmetry_loop1	@ while (aX >= in+n4)
3377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangpresymmetry_loop2:
3397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r6, [r4],#-16		@ r6 = s0 = aX[0]
3407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r10,[r5,#1]		@ r10= T[1]
3417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r7, [r4,#16+8]		@ r6 = s2 = aX[2]
3427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r11,[r5],-r2		@ r11= T[0]   T -= step
3437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #8
3447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7, r7, ASR #8
3457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XPROD31(s0, s2, T[1], T[0], 0xaX[0], &ax[2])
3477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r9, r6, r10		@ r9   = s0*T[1]
3487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r6, r6, #0
3497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r9, r7, r11,r9		@ r9  += s2*T[0]
3507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r4, r1
3517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r7, r10		@ r12  = s2*T[1]
3527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r9, [r4,#16]		@ aX[0] = r9
3537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r12,r6, r11,r12		@ r12 -= s0*T[0]
3547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r12,[r4,#8+16]		@ aX[2] = r12
3557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGE	presymmetry_loop2	@ while (aX >= in)
3577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = n
3597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in
3607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = step
3617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = shift
3627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r3}
3637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r5, =sincos_lookup0	@ r5 = T=sincos_lookup0
3647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r4, r1, r0, LSL #1	@ r4 = aX = in+(n>>1)
3657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r4, r4, #4*4		@ r4 = aX = in+(n>>1)-4
3667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r11,[r5,#1]		@ r11= T[1]
3677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r10,[r5],r2		@ r10= T[0]    T += step
3687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangpresymmetry_loop3:
3697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r8, [r1],#16 		@ r8 = ro0 = bX[0]
3707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r9, [r1,#8-16]		@ r9 = ro2 = bX[2]
3717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r6, [r4],#-16		@ r6 = ri0 = aX[0]
3727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r7, [r4,#8+16]		@ r7 = ri2 = aX[2]
3737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, ASR #8
3747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r9, ASR #8
3757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #8
3767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XNPROD31( ro2, ro0, T[1], T[0], 0xaX[0], &aX[2] )
3787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ aX[0] = (ro2*T[1] - ro0*T[0])>>31 aX[2] = (ro0*T[1] + ro2*T[0])>>31
3797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r8, r11		@ r12  = ro0*T[1]
3807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7, r7, ASR #8
3817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r12,r9, r10,r12		@ r12 += ro2*T[0]
3827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r8, r8, #0		@ r8 = -ro0
3837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r3, r9, r11		@ r3   = ro2*T[1]
3847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r11,[r5,#1]		@ r11= T[1]
3857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r3, r8, r10,r3		@ r3  -= ro0*T[0]
3867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r10,[r5],r2		@ r10= T[0]    T += step
3877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r12,[r4,#16+8]
3887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r3, [r4,#16]
3897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
3907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XNPROD31( ri2, ri0, T[0], T[1], 0xbX[0], &bX[2] )
3917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ bX[0] = (ri2*T[0] - ri0*T[1])>>31 bX[2] = (ri0*T[0] + ri2*T[1])>>31
3927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r6, r10		@ r12  = ri0*T[0]
3937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r6, r6, #0		@ r6 = -ri0
3947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r12,r7, r11,r12		@ r12 += ri2*T[1]
3957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r4, r1
3967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r3, r7, r10		@ r3   = ri2*T[0]
3977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r12,[r1,#8-16]
3987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r3, r6, r11,r3		@ r3  -= ri0*T[1]
3997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r3, [r1,#-16]
4007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGE	presymmetry_loop3
4027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r1,r1,r0		@ r1 = in -= n>>2 (i.e. restore in)
4047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r3,[r13]
4067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r2,[r13,#-4]!
4077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterflies
4097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = n  = (points * 2)
4107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in = x
4117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = i
4127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = shift
4137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r0-r1}
4147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSBS	r4,r3,#6		@ r4 = stages = 7-shift then --stages
4157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r5,=sincos_lookup0
4167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLE	no_generics
4177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,#4			@ r14= 4               (i=0)
4187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r14,LSL r3		@ r6 = (4<<i)<<shift
4197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_butterflies_loop1:
4207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r0, r0, LSR #1		@ r0 = points>>i = POINTS
4217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, r14,LSR #2		@ r2 = (1<<i)-j        (j=0)
4227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMFD	r13!,{r4,r14}
4237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_butterflies_loop2:
4247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterfly_generic(x+POINTS*j, POINTS, 4<<(i+shift))
4267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterfly_generic(r1, r0, r6)
4277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = points
4287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = x
4297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ preserve r2 (external loop counter)
4307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ preserve r3
4317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ preserve r4 (external loop counter)
4327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r5 = T = sincos_lookup0
4337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r6 = step
4347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ preserve r14
4357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r2,[r13,#-4]!		@ stack r2
4377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r1,r1,r0,LSL #1		@ r1 = x2+4 = x + (POINTS>>1)
4387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r7,r1,r0,LSL #1		@ r7 = x1+4 = x + POINTS
4397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r5,#1024		@ r12= sincos_lookup0+1024
4407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_bufferfly_generic_loop1:
4427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMDB	r7!,{r2,r3,r8,r11}	@ r2 = x1[0]
4437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r3 = x1[1]
4447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x1[2]
4457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x1[3]    x1 -= 4
4467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMDB	r1!,{r4,r9,r10,r14}	@ r4 = x2[0]
4477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x2[1]
4487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x2[2]
4497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r14= x2[3]    x2 -= 4
4507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r2, r2, r3		@ r2 = s0 = x1[0] - x1[1]
4527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r3, r2, r3, LSL #1	@ r3 =      x1[0] + x1[1] (-> x1[0])
4537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r11,r8		@ r11= s1 = x1[3] - x1[2]
4547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r11,r8, LSL #1	@ r8 =      x1[3] + x1[2] (-> x1[2])
4557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r9, r4		@ r9 = s2 = x2[1] - x2[0]
4567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r4, r9, r4, LSL #1	@ r4 =      x2[1] + x2[0] (-> x1[1])
4577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r14,r10		@ r14= s3 = x2[3] - x2[2]
4587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r14,r10,LSL #1	@ r10=      x2[3] + x2[2] (-> x1[3])
4597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r7,{r3,r4,r8,r10}
4607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = points
4627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = x2
4637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = s0
4647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 free
4657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r4 free
4667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r5 = T
4677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r6 = step
4687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r7 = x1
4697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r8 free
4707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r9 = s2
4717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r10 free
4727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r11= s1
4737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r12= limit
4747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r14= s3
4757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r8, [r5,#1]		@ r8 = T[1]
4777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r10,[r5],r6		@ r10= T[0]		T += step
4787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, r2, ASR #8
4797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r11,r11,ASR #8
4807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r9, ASR #8
4817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r14,ASR #8
4827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XPROD31(s1, s0, T[0], T[1], &x2[0], &x2[2])
4847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x2[0] = (s1*T[0] + s0*T[1])>>31     x2[2] = (s0*T[0] - s1*T[1])>>31
4857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall Xscale
4867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r3, r2, r8		@ r3   = s0*T[1]
4877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r3, r11,r10,r3		@ r3  += s1*T[0]
4887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r11,r11,#0
4897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r4, r8, r11		@ r4   = -s1*T[1]
4907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r4, r2, r10,r4		@ r4  += s0*T[0] = Value for x2[2]
4917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, r3			@ r2 = r3 = Value for x2[0]
4927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
4937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XPROD31(s2, s3, T[0], T[1], &x2[1], &x2[3])
4947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x2[1] = (s2*T[0] + s3*T[1])>>31     x2[3] = (s3*T[0] - s2*T[1])>>31
4957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r3, r9, r10		@ r3   = s2*T[0]
4967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r3, r14,r8, r3		@ r3  += s3*T[1] = Value for x2[1]
4977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r9, r9, #0
4987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r11,r14,r10		@ r11  = s3*T[0]
4997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r11,r9, r8, r11		@ r11 -= s2*T[1] = Value for x2[3]
5007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r5, r12
5017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1,{r2,r3,r4,r11}
5037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLT	mdct_bufferfly_generic_loop1
5057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,#1024
5077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_bufferfly_generic_loop2:
5087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMDB	r7!,{r2,r3,r9,r10}	@ r2 = x1[0]
5097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r3 = x1[1]
5107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x1[2]
5117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x1[3]    x1 -= 4
5127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMDB	r1!,{r4,r8,r11,r14}	@ r4 = x2[0]
5137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x2[1]
5147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x2[2]
5157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r14= x2[3]    x2 -= 4
5167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r2, r2, r3		@ r2 = s0 = x1[0] - x1[1]
5187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r3, r2, r3, LSL #1	@ r3 =      x1[0] + x1[1] (-> x1[0])
5197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r9,r10		@ r9 = s1 = x1[2] - x1[3]
5207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r9,r10, LSL #1	@ r10=      x1[2] + x1[3] (-> x1[2])
5217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r4, r4, r8		@ r4 = s2 = x2[0] - x2[1]
5227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r4, r8, LSL #1	@ r8 =      x2[0] + x2[1] (-> x1[1])
5237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r14,r11		@ r14= s3 = x2[3] - x2[2]
5247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r14,r11,LSL #1	@ r11=      x2[3] + x2[2] (-> x1[3])
5257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r7,{r3,r8,r10,r11}
5267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = points
5287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = x2
5297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = s0
5307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 free
5317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r4 = s2
5327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r5 = T
5337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r6 = step
5347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r7 = x1
5357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r8 free
5367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r9 = s1
5377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r10 free
5387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r11 free
5397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r12= limit
5407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r14= s3
5417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r8, [r5,#1]		@ r8 = T[1]
5437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r10,[r5],-r6		@ r10= T[0]		T -= step
5447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, r2, ASR #8
5457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r9, ASR #8
5467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r4, r4, ASR #8
5477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r14,ASR #8
5487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XNPROD31(s0, s1, T[0], T[1], &x2[0], &x2[2])
5507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x2[0] = (s0*T[0] - s1*T[1])>>31     x2[2] = (s1*T[0] + s0*T[1])>>31
5517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall Xscale
5527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r11,r2, r8		@ r11  = s0*T[1]
5537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r11,r9, r10,r11		@ r11 += s1*T[0]
5547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r9, r9, #0
5557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r2, r10,r2		@ r2   = s0*T[0]
5567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r2, r9, r8, r2		@ r2  += -s1*T[1] = Value for x2[0]
5577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r11			@ r9 = r11 = Value for x2[2]
5587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XNPROD31(s3, s2, T[0], T[1], &x2[1], &x2[3])
5607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x2[1] = (s3*T[0] - s2*T[1])>>31     x2[3] = (s2*T[0] + s3*T[1])>>31
5617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r11,r4, r10		@ r11   = s2*T[0]
5627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r11,r14,r8, r11		@ r11  += s3*T[1] = Value for x2[3]
5637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r4, r4, #0
5647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r3, r14,r10		@ r3   = s3*T[0]
5657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r3, r4, r8, r3		@ r3  -= s2*T[1] = Value for x2[1]
5667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r5, r12
5677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1,{r2,r3,r9,r11}
5697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	mdct_bufferfly_generic_loop2
5717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r2,[r13],#4		@ unstack r2
5737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r1, r1, r0, LSL #2	@ r1 = x+POINTS*j
5747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall Xscale
5757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r2, r2, #1		@ r2--                 (j++)
5767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	mdct_butterflies_loop2
5777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13!,{r4,r14}
5797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r1,[r13,#4]
5817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r4, r4, #1		@ stages--
5837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,r14,LSL #1		@ r14= 4<<i            (i++)
5847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, LSL #1		@ r6 = step <<= 1      (i++)
5857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGE	mdct_butterflies_loop1
5867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13,{r0-r1}
5877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangno_generics:
5897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterflies part2 (loop around mdct_bufferfly_32)
5907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = points
5917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in
5927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = step
5937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = shift
5947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_bufferflies_loop3:
5967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_bufferfly_32
5977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
5987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ block1
5997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r4, r1, #16*4		@ r4 = &in[16]
6007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r4,{r5,r6,r9,r10}	@ r5 = x[16]
6017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[17]
6027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[18]
6037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[19]
6047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r7,r8,r11,r12}	@ r7 = x[0]
6057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[1]
6067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[2]
6077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[3]
6087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r6		@ r5 = s0 = x[16] - x[17]
6097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r5, r6, LSL #1	@ r6 =      x[16] + x[17]  -> x[16]
6107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r9, r10		@ r9 = s1 = x[18] - x[19]
6117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r9, r10,LSL #1	@ r10=      x[18] + x[19]  -> x[18]
6127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r8, r8, r7		@ r8 = s2 = x[ 1] - x[ 0]
6137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r7, r8, r7, LSL #1	@ r7 =      x[ 1] + x[ 0]  -> x[17]
6147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,r11		@ r12= s3 = x[ 3] - x[ 2]
6157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r12,r11, LSL #1	@ r11=      x[ 3] + x[ 2]  -> x[19]
6167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r4!,{r6,r7,r10,r11}
6177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6,#0xed		@ r6 =cPI1_8
6197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7,#0x62		@ r7 =cPI3_8
6207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5, r5, ASR #8
6227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r9, ASR #8
6237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, ASR #8
6247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r12,ASR #8
6257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XNPROD31( s0, s1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] )
6277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x[0] = s0*cPI3_8 - s1*cPI1_8     x[2] = s1*cPI3_8 + s0*cPI1_8
6287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall Xscale
6297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r11,r5, r6		@ r11  = s0*cPI1_8
6307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r11,r9, r7, r11		@ r11 += s1*cPI3_8
6317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r9, r9, #0
6327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r5, r7, r5		@ r5   = s0*cPI3_8
6337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r5, r9, r6, r5		@ r5  -= s1*cPI1_8
6347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XPROD31 ( s2, s3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] )
6367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x[1] = s2*cPI1_8 + s3*cPI3_8     x[3] = s3*cPI1_8 - s2*cPI3_8
6377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r9, r8, r6		@ r9   = s2*cPI1_8
6387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r9, r12,r7, r9		@ r9  += s3*cPI3_8
6397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r8,r8,#0
6407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r6, r12		@ r12  = s3*cPI1_8
6417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r12,r8, r7, r12		@ r12 -= s2*cPI3_8
6427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1!,{r5,r9,r11,r12}
6437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ block2
6457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r4,{r5,r6,r9,r10}	@ r5 = x[20]
6467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[21]
6477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[22]
6487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[23]
6497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r7,r8,r11,r12}	@ r7 = x[4]
6507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[5]
6517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[6]
6527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[7]
6537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r6		@ r5 = s0 = x[20] - x[21]
6547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r5, r6, LSL #1	@ r6 =      x[20] + x[21]  -> x[20]
6557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r9, r10		@ r9 = s1 = x[22] - x[23]
6567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r9, r10,LSL #1	@ r10=      x[22] + x[23]  -> x[22]
6577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r8, r8, r7		@ r8 = s2 = x[ 5] - x[ 4]
6587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r7, r8, r7, LSL #1	@ r7 =      x[ 5] + x[ 4]  -> x[21]
6597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,r11		@ r12= s3 = x[ 7] - x[ 6]
6607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r12,r11, LSL #1	@ r11=      x[ 7] + x[ 6]  -> x[23]
6617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,#0xb5		@ cPI2_8
6627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r4!,{r6,r7,r10,r11}
6637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r9		@ r5 = s0 - s1
6657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r5, r9, LSL #1	@ r9 = s0 + s1
6667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5, r5, ASR #8
6677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r5, r14,r5		@ r5 = (s0-s1)*cPI2_8
6687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,r8		@ r12= s3 - s2
6697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r12,r8, LSL #1	@ r8 = s3 + s2
6707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, ASR #8
6727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r8, r14,r8		@ r8  = (s3+s2)*cPI2_8
6737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r9, ASR #8
6747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r9, r14,r9		@ r9  = (s0+s1)*cPI2_8
6757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r12,ASR #8
6767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r14,r12		@ r12 = (s3-s2)*cPI2_8
6777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1!,{r5,r8,r9,r12}
6787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ block3
6807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r4,{r5,r6,r9,r10}	@ r5 = x[24]
6817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[25]
6827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[25]
6837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[26]
6847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r7,r8,r11,r12}	@ r7 = x[8]
6857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[9]
6867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[10]
6877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[11]
6887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r6		@ r5 = s0 = x[24] - x[25]
6897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r5, r6, LSL #1	@ r6 =      x[24] + x[25]  -> x[25]
6907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r9, r10		@ r9 = s1 = x[26] - x[27]
6917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r9, r10,LSL #1	@ r10=      x[26] + x[27]  -> x[26]
6927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r8, r8, r7		@ r8 = s2 = x[ 9] - x[ 8]
6937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r7, r8, r7, LSL #1	@ r7 =      x[ 9] + x[ 8]  -> x[25]
6947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,r11		@ r12= s3 = x[11] - x[10]
6957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r12,r11, LSL #1	@ r11=      x[11] + x[10]  -> x[27]
6967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r4!,{r6,r7,r10,r11}
6977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
6987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6,#0x62		@ r6 = cPI3_8
6997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r7,#0xED		@ r7 = cPI1_8
7007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
7017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XNPROD31( s0, s1, cPI1_8, cPI3_8, &x[ 8], &x[10] )
7027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x[8] = s0*cPI1_8 - s1*cPI3_8     x[10] = s1*cPI1_8 + s0*cPI3_8
7037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall Xscale
7047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5, r5, ASR #8
7057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r11,r5, r6		@ r11  = s0*cPI3_8
7067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r9, ASR #8
7077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r11,r9, r7, r11		@ r11 += s1*cPI1_8
7087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r9, r9, #0
7097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r5, r7, r5		@ r5   = s0*cPI1_8
7107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r5, r9, r6, r5		@ r5  -= s1*cPI3_8
7117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
7127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XPROD31 ( s2, s3, cPI3_8, cPI1_8, &x[ 9], &x[11] )
7137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x[9] = s2*cPI3_8 + s3*cPI1_8     x[11] = s3*cPI3_8 - s2*cPI1_8
7147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, ASR #8
7157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r9, r8, r6		@ r9   = s2*cPI3_8
7167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r12,ASR #8
7177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r9, r12,r7, r9		@ r9  += s3*cPI1_8
7187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r8,r8,#0
7197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r6, r12		@ r12  = s3*cPI3_8
7207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r12,r8, r7, r12		@ r12 -= s2*cPI1_8
7217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1!,{r5,r9,r11,r12}
7227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
7237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ block4
7247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r4,{r5,r6,r10,r11}	@ r5 = x[28]
7257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[29]
7267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[30]
7277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[31]
7287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r8,r9,r12,r14}	@ r8 = x[12]
7297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[13]
7307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[14]
7317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r14= x[15]
7327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r6		@ r5 = s0 = x[28] - x[29]
7337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r5, r6, LSL #1	@ r6 =      x[28] + x[29]  -> x[28]
7347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r7, r14,r12		@ r7 = s3 = x[15] - x[14]
7357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r7, r12, LSL #1	@ r12=      x[15] + x[14]  -> x[31]
7367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r10,r10,r11		@ r10= s1 = x[30] - x[31]
7377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r10,r11,LSL #1	@ r11=      x[30] + x[31]  -> x[30]
7387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14, r8, r9		@ r14= s2 = x[12] - x[13]
7397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r14, r9, LSL #1	@ r9 =      x[12] + x[13]  -> x[29]
7407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r4!,{r6,r9,r11,r12}
7417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1!,{r5,r7,r10,r14}
7427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
7437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterfly16 (1st version)
7447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ block 1
7457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r1,r1,#16*4
7467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r4,r1,#8*4
7477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r4,{r5,r6,r9,r10}	@ r5 = x[ 8]
7487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[ 9]
7497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[10]
7507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[11]
7517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r7,r8,r11,r12}	@ r7 = x[0]
7527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[1]
7537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[2]
7547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[3]
7557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r6		@ r5 = s0 = x[ 8] - x[ 9]
7567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r5, r6, LSL #1	@ r6 =      x[ 8] + x[ 9]  -> x[ 8]
7577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r9, r10		@ r9 = s1 = x[10] - x[11]
7587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r9, r10,LSL #1	@ r10=      x[10] + x[11]  -> x[10]
7597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r8, r8, r7		@ r8 = s2 = x[ 1] - x[ 0]
7607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r7, r8, r7, LSL #1	@ r7 =      x[ 1] + x[ 0]  -> x[ 9]
7617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,r11		@ r12= s3 = x[ 3] - x[ 2]
7627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r12,r11, LSL #1	@ r11=      x[ 3] + x[ 2]  -> x[11]
7637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,#0xB5		@ r14= cPI2_8
7647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r4!,{r6,r7,r10,r11}
7657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
7667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r9		@ r5 = s0 - s1
7677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r5, r9, LSL #1	@ r9 = s0 + s1
7687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5, r5, ASR #8
7697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r5, r14,r5		@ r5  = (s0-s1)*cPI2_8
7707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,r8		@ r12= s3 - s2
7717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r12,r8, LSL #1	@ r8 = s3 + s2
7727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
7737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, ASR #8
7747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r8, r14,r8		@ r8  = (s3+s2)*cPI2_8
7757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r9, ASR #8
7767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r9, r14,r9		@ r9  = (s0+s1)*cPI2_8
7777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r12,ASR #8
7787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r14,r12		@ r12 = (s3-s2)*cPI2_8
7797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1!,{r5,r8,r9,r12}
7807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
7817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ block2
7827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r4,{r5,r6,r9,r10}	@ r5 = x[12]
7837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[13]
7847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[14]
7857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[15]
7867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r7,r8,r11,r12}	@ r7 = x[ 4]
7877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[ 5]
7887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[ 6]
7897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[ 7]
7907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r7, r8		@ r14= s0 = x[ 4] - x[ 5]
7917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r14,r8, LSL #1	@ r8 =      x[ 4] + x[ 5]  -> x[13]
7927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r7, r12,r11		@ r7 = s1 = x[ 7] - x[ 6]
7937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r7, r11, LSL #1	@ r11=      x[ 7] + x[ 6]  -> x[15]
7947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r6		@ r5 = s2 = x[12] - x[13]
7957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r5, r6, LSL #1	@ r6 =      x[12] + x[13]  -> x[12]
7967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r9, r10		@ r12= s3 = x[14] - x[15]
7977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r12,r10,LSL #1	@ r10=      x[14] + x[15]  -> x[14]
7987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r4!,{r6,r8,r10,r11}
7997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1!,{r5,r7,r12,r14}
8007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
8017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterfly_8
8027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMDB	r1,{r6,r7,r8,r9,r10,r11,r12,r14}
8037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[0]
8047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r7 = x[1]
8057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[2]
8067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[3]
8077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[4]
8087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[5]
8097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[6]
8107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r14= x[7]
8117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r6, r7		@ r6 = s0 = x[0] + x[1]
8127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r7, r6, r7, LSL #1	@ r7 = s1 = x[0] - x[1]
8137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r8, r9		@ r8 = s2 = x[2] + x[3]
8147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r8, r9, LSL #1	@ r9 = s3 = x[2] - x[3]
8157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r10,r11		@ r10= s4 = x[4] + x[5]
8167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r10,r11,LSL #1	@ r11= s5 = x[4] - x[5]
8177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r12,r14		@ r12= s6 = x[6] + x[7]
8187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r12,r14,LSL #1	@ r14= s7 = x[6] - x[7]
8197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
8207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r2, r11,r9		@ r2 = x[0] = s5 + s3
8217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r4, r2, r9, LSL #1	@ r4 = x[2] = s5 - s3
8227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r3, r14,r7		@ r3 = x[1] = s7 - s1
8237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r5, r3, r7, LSL #1	@ r5 = x[3] = s7 + s1
8247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r10,r10,r6		@ r10= x[4] = s4 - s0
8257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r12,r8		@ r11= x[5] = s6 - s2
8267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r10,r6, LSL #1	@ r12= x[6] = s4 + s0
8277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r14,r11,r8, LSL #1	@ r14= x[7] = s6 + s2
8287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMDB	r1,{r2,r3,r4,r5,r10,r11,r12,r14}
8297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
8307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterfly_8
8317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r6,r7,r8,r9,r10,r11,r12,r14}
8327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[0]
8337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r7 = x[1]
8347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[2]
8357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[3]
8367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[4]
8377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[5]
8387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[6]
8397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r14= x[7]
8407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r6, r7		@ r6 = s0 = x[0] + x[1]
8417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r7, r6, r7, LSL #1	@ r7 = s1 = x[0] - x[1]
8427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r8, r9		@ r8 = s2 = x[2] + x[3]
8437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r8, r9, LSL #1	@ r9 = s3 = x[2] - x[3]
8447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r10,r11		@ r10= s4 = x[4] + x[5]
8457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r10,r11,LSL #1	@ r11= s5 = x[4] - x[5]
8467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r12,r14		@ r12= s6 = x[6] + x[7]
8477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r12,r14,LSL #1	@ r14= s7 = x[6] - x[7]
8487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
8497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r2, r11,r9		@ r2 = x[0] = s5 + s3
8507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r4, r2, r9, LSL #1	@ r4 = x[2] = s5 - s3
8517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r3, r14,r7		@ r3 = x[1] = s7 - s1
8527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r5, r3, r7, LSL #1	@ r5 = x[3] = s7 + s1
8537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r10,r10,r6		@ r10= x[4] = s4 - s0
8547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r12,r8		@ r11= x[5] = s6 - s2
8557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r10,r6, LSL #1	@ r12= x[6] = s4 + s0
8567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r14,r11,r8, LSL #1	@ r14= x[7] = s6 + s2
8577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1,{r2,r3,r4,r5,r10,r11,r12,r14}
8587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
8597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterfly16 (2nd version)
8607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ block 1
8617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r1,r1,#16*4-8*4
8627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r4,r1,#8*4
8637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r4,{r5,r6,r9,r10}	@ r5 = x[ 8]
8647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[ 9]
8657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[10]
8667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[11]
8677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r7,r8,r11,r12}	@ r7 = x[0]
8687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[1]
8697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[2]
8707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[3]
8717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r6		@ r5 = s0 = x[ 8] - x[ 9]
8727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r5, r6, LSL #1	@ r6 =      x[ 8] + x[ 9]  -> x[ 8]
8737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r9, r10		@ r9 = s1 = x[10] - x[11]
8747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r9, r10,LSL #1	@ r10=      x[10] + x[11]  -> x[10]
8757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r8, r8, r7		@ r8 = s2 = x[ 1] - x[ 0]
8767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r7, r8, r7, LSL #1	@ r7 =      x[ 1] + x[ 0]  -> x[ 9]
8777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,r11		@ r12= s3 = x[ 3] - x[ 2]
8787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r12,r11, LSL #1	@ r11=      x[ 3] + x[ 2]  -> x[11]
8797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r14,#0xb5		@ r14= cPI2_8
8807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r4!,{r6,r7,r10,r11}
8817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
8827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r9		@ r5 = s0 - s1
8837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r5, r9, LSL #1	@ r9 = s0 + s1
8847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r5, r5, ASR #8
8857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r5, r14,r5		@ r5  = (s0-s1)*cPI2_8
8867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r12,r8		@ r12= s3 - s2
8877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r12,r8, LSL #1	@ r8 = s3 + s2
8887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
8897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, ASR #8
8907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r8, r14,r8		@ r8  = (s3+s2)*cPI2_8
8917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r9, r9, ASR #8
8927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r9, r14,r9		@ r9  = (s0+s1)*cPI2_8
8937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r12,r12,ASR #8
8947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r14,r12		@ r12 = (s3-s2)*cPI2_8
8957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1!,{r5,r8,r9,r12}
8967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
8977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ block2
8987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r4,{r5,r6,r9,r10}	@ r5 = x[12]
8997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[13]
9007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[14]
9017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[15]
9027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r7,r8,r11,r12}	@ r7 = x[ 4]
9037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[ 5]
9047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[ 6]
9057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[ 7]
9067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, r6		@ r5 = s2 = x[12] - x[13]
9077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r5, r6, LSL #1	@ r6 =      x[12] + x[13]  -> x[12]
9087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r9, r10		@ r9 = s3 = x[14] - x[15]
9097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r9, r10,LSL #1	@ r10=      x[14] + x[15]  -> x[14]
9107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r7, r8		@ r14= s0 = x[ 4] - x[ 5]
9117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r14,r8, LSL #1	@ r8 =      x[ 4] + x[ 5]  -> x[13]
9127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r7, r12,r11		@ r7 = s1 = x[ 7] - x[ 6]
9137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r11,r7, r11, LSL #1	@ r11=      x[ 7] + x[ 6]  -> x[15]
9147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r4!,{r6,r8,r10,r11}
9157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1!,{r5,r7,r9,r14}
9167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
9177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterfly_8
9187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMDB	r1,{r6,r7,r8,r9,r10,r11,r12,r14}
9197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[0]
9207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r7 = x[1]
9217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[2]
9227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[3]
9237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[4]
9247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[5]
9257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[6]
9267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r14= x[7]
9277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r6, r7		@ r6 = s0 = x[0] + x[1]
9287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r7, r6, r7, LSL #1	@ r7 = s1 = x[0] - x[1]
9297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r8, r9		@ r8 = s2 = x[2] + x[3]
9307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r8, r9, LSL #1	@ r9 = s3 = x[2] - x[3]
9317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r10,r11		@ r10= s4 = x[4] + x[5]
9327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r10,r11,LSL #1	@ r11= s5 = x[4] - x[5]
9337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r12,r14		@ r12= s6 = x[6] + x[7]
9347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r12,r14,LSL #1	@ r14= s7 = x[6] - x[7]
9357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
9367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r2, r11,r9		@ r2 = x[0] = s5 + s3
9377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r4, r2, r9, LSL #1	@ r4 = x[2] = s5 - s3
9387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r3, r14,r7		@ r3 = x[1] = s7 - s1
9397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r5, r3, r7, LSL #1	@ r5 = x[3] = s7 + s1
9407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r10,r10,r6		@ r10= x[4] = s4 - s0
9417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r12,r8		@ r11= x[5] = s6 - s2
9427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r10,r6, LSL #1	@ r12= x[6] = s4 + s0
9437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r14,r11,r8, LSL #1	@ r14= x[7] = s6 + s2
9447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMDB	r1,{r2,r3,r4,r5,r10,r11,r12,r14}
9457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
9467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_butterfly_8
9477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMIA	r1,{r6,r7,r8,r9,r10,r11,r12,r14}
9487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r6 = x[0]
9497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r7 = x[1]
9507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r8 = x[2]
9517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r9 = x[3]
9527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r10= x[4]
9537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r11= x[5]
9547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r12= x[6]
9557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang					@ r14= x[7]
9567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r6, r7		@ r6 = s0 = x[0] + x[1]
9577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r7, r6, r7, LSL #1	@ r7 = s1 = x[0] - x[1]
9587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r8, r9		@ r8 = s2 = x[2] + x[3]
9597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r9, r8, r9, LSL #1	@ r9 = s3 = x[2] - x[3]
9607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r10,r11		@ r10= s4 = x[4] + x[5]
9617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r10,r11,LSL #1	@ r11= s5 = x[4] - x[5]
9627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r12,r14		@ r12= s6 = x[6] + x[7]
9637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r12,r14,LSL #1	@ r14= s7 = x[6] - x[7]
9647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
9657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r2, r11,r9		@ r2 = x[0] = s5 + s3
9667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r4, r2, r9, LSL #1	@ r4 = x[2] = s5 - s3
9677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r3, r14,r7		@ r3 = x[1] = s7 - s1
9687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r5, r3, r7, LSL #1	@ r5 = x[3] = s7 + s1
9697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r10,r10,r6		@ r10= x[4] = s4 - s0
9707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r12,r8		@ r11= x[5] = s6 - s2
9717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r12,r10,r6, LSL #1	@ r12= x[6] = s4 + s0
9727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r14,r11,r8, LSL #1	@ r14= x[7] = s6 + s2
9737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STMIA	r1,{r2,r3,r4,r5,r10,r11,r12,r14}
9747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
9757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r1,r1,#8*4
9767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUBS	r0,r0,#64
9777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	mdct_bufferflies_loop3
9787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
9797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13,{r0-r3}
9807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
9817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_bitreverseARM:
9827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = points
9837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in
9847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = step
9857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = shift
9867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
9877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r4, #0			@ r4 = bit = 0
9887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r5, r1, r0, LSL #1	@ r5 = w = x + (n>>1)
9897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADR	r6, bitrev
9907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r3, r3, #2		@ r3 = shift -= 2
9917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r5, r5, #8
9927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangbrev_lp:
9937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r7, [r6, r4, LSR #6]
9947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	AND	r8, r4, #0x3f
9957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r8, [r6, r8]
9967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r4, r4, #1		@ bit++
9977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ stall XScale
9987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ORR	r7, r7, r8, LSL #6	@ r7 = bitrev[bit]
9997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r1, r7, LSR r3	@ r9 = xx = x + (b>>shift)
10007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r5, r9			@ if (w > xx)
10017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r10,[r5],#-8		@   r10 = w[0]		w -= 2
10027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRGT	r11,[r5,#12]		@   r11 = w[1]
10037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRGT	r12,[r9]		@   r12 = xx[0]
10047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRGT	r14,[r9,#4]		@   r14 = xx[1]
10057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRGT	r10,[r9]		@   xx[0]= w[0]
10067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRGT	r11,[r9,#4]		@   xx[1]= w[1]
10077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRGT	r12,[r5,#8]		@   w[0] = xx[0]
10087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STRGT	r14,[r5,#12]		@   w[1] = xx[1]
10097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r5,r1
10107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BGT	brev_lp
10117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ mdct_step7
10137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = points
10147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in
10157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = step
10167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = shift-2
10177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r2, #4			@ r5 = T = (step>=4) ?
10197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRGE	r5, =sincos_lookup0	@          sincos_lookup0 +
10207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRLT	r5, =sincos_lookup1	@          sincos_lookup0 +
10217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r7, r1, r0, LSL #1	@ r7 = w1 = x + (n>>1)
10227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADDGE	r5, r5, r2, LSR #1	@		            (step>>1)
10237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r8, r5, #1024		@ r8 = Ttop
10247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangstep7_loop1:
10257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r6, [r1]		@ r6 = w0[0]
10267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r9, [r1,#4]		@ r9 = w0[1]
10277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r10,[r7,#-8]!		@ r10= w1[0]	w1 -= 2
10287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r11,[r7,#4]		@ r11= w1[1]
10297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r14,[r5,#1]		@ r14= T[1]
10307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r12,[r5],r2		@ r12= T[0]	T += step
10317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r6, r10		@ r6 = s0 = w0[0] + w1[0]
10337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r10,r6, r10,LSL #1	@ r10= s1b= w0[0] - w1[0]
10347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r11,r9		@ r11= s1 = w1[1] - w0[1]
10357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r11,r9, LSL #1	@ r9 = s0b= w1[1] + w0[1]
10367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #9
10387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r3, r6, r14		@ r3   = s0*T[1]
10397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r11,r11,ASR #9
10407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r4, r11,r12		@ r4  += s1*T[0] = s2
10417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r3, r3, r4
10427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r14,r11,r14		@ r14  = s1*T[1]
10437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r6, r12		@ r12 += s0*T[0] = s3
10447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r14,r12
10457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r9 = s0b<<1
10477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r10= s1b<<1
10487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r3, r9, ASR #1	@ r9 = s0b + s2
10497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r3, r9, r3, LSL #1	@ r3 = s0b - s2
10507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r14,r10,ASR #1	@ r12= s3  - s1b
10527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r14,r10,ASR #1	@ r10= s3  + s1b
10537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r9, [r1],#4
10547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r10,[r1],#4		@ w0 += 2
10557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r3, [r7]
10567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r12,[r7,#4]
10577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r5,r8
10597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLT	step7_loop1
10607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangstep7_loop2:
10627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r6, [r1]		@ r6 = w0[0]
10637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r9, [r1,#4]		@ r9 = w0[1]
10647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r10,[r7,#-8]!		@ r10= w1[0]	w1 -= 2
10657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r11,[r7,#4]		@ r11= w1[1]
10667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r14,[r5,-r2]!		@ r12= T[1]	T -= step
10677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r12,[r5,#1]		@ r14= T[0]
10687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r6, r6, r10		@ r6 = s0 = w0[0] + w1[0]
10707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r10,r6, r10,LSL #1	@ r10= s1b= w0[0] - w1[0]
10717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r11,r11,r9		@ r11= s1 = w1[1] - w0[1]
10727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r11,r9, LSL #1	@ r9 = s0b= w1[1] + w0[1]
10737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #9
10757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r3, r6, r14		@ r3   = s0*T[0]
10767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r11,r11,ASR #9
10777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r4, r11,r12		@ r4  += s1*T[1] = s2
10787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r3, r3, r4
10797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r14,r11,r14		@ r14  = s1*T[0]
10807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r12,r6, r12		@ r12 += s0*T[1] = s3
10817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r14,r14,r12
10827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r9 = s0b<<1
10847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r10= s1b<<1
10857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r9, r3, r9, ASR #1	@ r9 = s0b + s2
10867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r3, r9, r3, LSL #1	@ r3 = s0b - s2
10877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	SUB	r12,r14,r10,ASR #1	@ r12= s3  - s1b
10897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r10,r14,r10,ASR #1	@ r10= s3  + s1b
10907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r9, [r1],#4
10917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r10,[r1],#4		@ w0 += 2
10927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r3, [r7]
10937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r12,[r7,#4]
10947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r1,r7
10967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLT	step7_loop2
10977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
10987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13!,{r0-r3}
10997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
11007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r0 = points
11017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r1 = in
11027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r2 = step
11037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ r3 = shift
11047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r2, r2, ASR #2		@ r2 = step >>= 2
11057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r2, #0
11067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMPNE	r2, #1
11077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BEQ	mdct_end
11087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
11097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ step > 1 (default case)
11107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r2, #4			@ r5 = T = (step>=4) ?
11117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRGE	r5, =sincos_lookup0	@          sincos_lookup0 +
11127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRLT	r5, =sincos_lookup1	@          sincos_lookup1
11137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADD	r7, r1, r0, LSL #1	@ r7 = iX = x + (n>>1)
11147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	ADDGE	r5, r5, r2, LSR #1	@		            (step>>1)
11157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_step8_default:
11167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r6, [r1],#4		@ r6 =  s0 = x[0]
11177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDR	r8, [r1],#4		@ r8 = -s1 = x[1]
11187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r12,[r5,#1]       	@ r12= T[1]
11197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDRB	r14,[r5],r2		@ r14= T[0]	T += step
11207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r8, r8, #0		@ r8 = s1
11217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
11227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ XPROD31(s0, s1, T[0], T[1], x, x+1)
11237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ x[0] = s0 * T[0] + s1 * T[1]      x[1] = s1 * T[0] - s0 * T[1]
11247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r6, r6, ASR #8
11257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r8, r8, ASR #8
11267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r10,r8, r12		@ r10  = s1 * T[1]
11277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	CMP	r1, r7
11287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r10,r6, r14,r10	@ r10 += s0 * T[0]
11297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	RSB	r6, r6, #0		@ r6 = -s0
11307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MUL	r11,r8, r14		@ r11  = s1 * T[0]
11317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MLA	r11,r6, r12,r11	@ r11 -= s0 * T[1]
11327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r10,[r1,#-8]
11337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	STR	r11,[r1,#-4]
11347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	BLT	mdct_step8_default
11357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
11367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_end:
11377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	MOV	r0, r2
11387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	LDMFD	r13!,{r4-r11,PC}
11397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
11407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangbitrev:
11417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	0
11427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	32
11437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	16
11447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	48
11457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	8
11467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	40
11477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	24
11487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	56
11497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	4
11507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	36
11517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	20
11527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	52
11537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	12
11547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	44
11557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	28
11567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	60
11577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	2
11587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	34
11597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	18
11607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	50
11617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	10
11627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	42
11637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	26
11647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	58
11657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	6
11667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	38
11677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	22
11687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	54
11697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	14
11707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	46
11717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	30
11727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	62
11737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	1
11747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	33
11757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	17
11767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	49
11777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	9
11787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	41
11797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	25
11807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	57
11817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	5
11827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	37
11837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	21
11847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	53
11857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	13
11867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	45
11877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	29
11887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	61
11897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	3
11907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	35
11917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	19
11927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	51
11937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	11
11947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	43
11957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	27
11967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	59
11977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	7
11987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	39
11997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	23
12007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	55
12017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	15
12027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	47
12037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	31
12047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	.byte	63
12057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang
12067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang	@ END
1207