17913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang@ Tremolo library 22da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@----------------------------------------------------------------------- 32da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ Copyright (C) 2002-2009, Xiph.org Foundation 42da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd 52da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ All rights reserved. 62da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang 72da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ Redistribution and use in source and binary forms, with or without 82da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ modification, are permitted provided that the following conditions 92da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ are met: 102da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang 112da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ * Redistributions of source code must retain the above copyright 122da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ notice, this list of conditions and the following disclaimer. 132da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ * Redistributions in binary form must reproduce the above 142da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ copyright notice, this list of conditions and the following disclaimer 152da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ in the documentation and/or other materials provided with the 162da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ distribution. 172da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ * Neither the names of the Xiph.org Foundation nor Pinknoise 182da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ Productions Ltd nor the names of its contributors may be used to 192da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ endorse or promote products derived from this software without 202da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ specific prior written permission. 212da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ 222da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 232da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 242da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 252da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 262da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 272da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 282da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 292da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 302da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 312da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 322da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 332da723a953a18e3c7fec194cec1216cf31130c86Gloria Wang@ ---------------------------------------------------------------------- 347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .text 367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ full accuracy version 387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .global mdct_backwardARM 407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .global mdct_shift_right 417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .global mdct_unroll_prelap 427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .global mdct_unroll_part2 437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .global mdct_unroll_part3 447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .global mdct_unroll_postlap 457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .extern sincos_lookup0 477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .extern sincos_lookup1 48277360226c7e0593767fa7f21cdacf0fdf024122Ard Biesheuvel .hidden sincos_lookup0 49277360226c7e0593767fa7f21cdacf0fdf024122Ard Biesheuvel .hidden sincos_lookup1 507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_unroll_prelap: 527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = out 537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = post 547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = r 557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = step 567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r4-r7,r14} 577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MVN r4, #0x8000 587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r3, r3, LSL #1 597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r1, r2, r1 @ r1 = r - post 607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, #16 @ r1 = r - post - 16 617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLT unroll_over 627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop: 637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMDB r2!,{r5,r6,r7,r12} 647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, ASR #9 @ r5 = (*--r)>>9 667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r6, r6, ASR #9 @ r6 = (*--r)>>9 677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r7, r7, ASR #9 @ r7 = (*--r)>>9 687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,ASR #9 @ r12= (*--r)>>9 697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r12,ASR #15 717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r12,r4, r14,ASR #31 737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r12,[r0], r3 747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r7, ASR #15 767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r7, r4, r14,ASR #31 787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r7, [r0], r3 797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r6, ASR #15 817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r6, r4, r14,ASR #31 837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r6, [r0], r3 847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r5, ASR #15 867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r5, r4, r14,ASR #31 887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r5, [r0], r3 897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, #16 917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGE unroll_loop 927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over: 947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADDS r1, r1, #16 957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLE unroll_end 967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop2: 977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r5,[r2,#-4]! 987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall 997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall (Xscale) 1007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, ASR #9 @ r5 = (*--r)>>9 1017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r5, ASR #15 1027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 1037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r5, r4, r14,ASR #31 1047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r5, [r0], r3 1057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, #4 1067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT unroll_loop2 1077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_end: 1087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13!,{r4-r7,PC} 1097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_unroll_postlap: 1117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = out 1127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = post 1137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = l 1147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = step 1157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r4-r7,r14} 1167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MVN r4, #0x8000 1177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r3, r3, LSL #1 1187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r1, r1, r2 @ r1 = post - l 1197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r1, r1, ASR #1 @ r1 = (post - l)>>1 1207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, #16 @ r1 = ((post - l)>>1) - 4 1217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLT unroll_over3 1227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop3: 1237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r12,[r2],#8 1247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7, [r2],#8 1257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6, [r2],#8 1267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r5, [r2],#8 1277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r12,r12,#0 1297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r5, r5, #0 1307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r6, r6, #0 1317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r7, r7, #0 1327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12, r12,ASR #9 @ r12= (-*l)>>9 1347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, ASR #9 @ r5 = (-*l)>>9 1357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r6, r6, ASR #9 @ r6 = (-*l)>>9 1367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r7, r7, ASR #9 @ r7 = (-*l)>>9 1377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r12,ASR #15 1397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 1407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r12,r4, r14,ASR #31 1417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r12,[r0], r3 1427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r7, ASR #15 1447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 1457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r7, r4, r14,ASR #31 1467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r7, [r0], r3 1477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r6, ASR #15 1497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 1507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r6, r4, r14,ASR #31 1517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r6, [r0], r3 1527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r5, ASR #15 1547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 1557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r5, r4, r14,ASR #31 1567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r5, [r0], r3 1577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, #16 1597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGE unroll_loop3 1607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over3: 1627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADDS r1, r1, #16 1637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLE unroll_over4 1647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop4: 1657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r5,[r2], #8 1667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall 1677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall (Xscale) 1687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r5, r5, #0 1697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, ASR #9 @ r5 = (-*l)>>9 1707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r5, ASR #15 1717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 1727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r5, r4, r14,ASR #31 1737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r5, [r0], r3 1747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, #4 1757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT unroll_loop4 1767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over4: 1777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13!,{r4-r7,PC} 1787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_unroll_part2: 1807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = out 1817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = post 1827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = l 1837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = r 1847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ <> = step 1857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ <> = wL 1867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ <> = wR 1877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r13 1887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r4,r6-r11,r14} 1897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r12,{r8,r9,r10} @ r8 = step 1907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = wL 1917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= wR 1927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MVN r4, #0x8000 1937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r8, r8, LSL #1 1947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r3, r1 @ r1 = (r - post) 1957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLE unroll_over5 1967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop5: 1977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r12,[r2, #-8]! @ r12= *l (but l -= 2 first) 1987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r11,[r9],#4 @ r11= *wL++ 1997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7, [r3, #-4]! @ r7 = *--r 2007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6, [r10,#-4]! @ r6 = *--wR 2017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ Can save a cycle here, at the cost of 1bit errors in rounding 2037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r11,r12,r11 @ (r14,r11) = *l * *wL++ 2047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r6, r7, r6 @ (r14,r6) = *--r * *--wR 2057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r6, r11 2067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r6, r6, ASR #8 2077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r6, ASR #15 2087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 2097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r6, r4, r14,ASR #31 2107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r6, [r0], r8 2117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, #4 2137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT unroll_loop5 2147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over5: 2167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13!,{r4,r6-r11,PC} 2177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_unroll_part3: 2197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = out 2207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = post 2217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = l 2227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = r 2237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ <> = step 2247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ <> = wL 2257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ <> = wR 2267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r13 2277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r4,r6-r11,r14} 2287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r12,{r8,r9,r10} @ r8 = step 2297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = wL 2307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= wR 2317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MVN r4, #0x8000 2327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r8, r8, LSL #1 2337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, r3 @ r1 = (post - r) 2347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLE unroll_over6 2357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_loop6: 2367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r12,[r2],#8 @ r12= *l (but l += 2 first) 2377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r11,[r9],#4 @ r11= *wL++ 2387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7, [r3],#4 @ r7 = *r++ 2397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6, [r10,#-4]! @ r6 = *--wR 2407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ Can save a cycle here, at the cost of 1bit errors in rounding 2427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r11,r12,r11 @ (r14,r11) = *l * *wL++ 2437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r6, r7, r6 @ (r14,r6) = *--r * *--wR 2447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r6, r6, r11 2457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r6, r6, ASR #8 2467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r6, ASR #15 2477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TEQ r14,r14,ASR #31 @ if r14==0 || r14==-1 then in range 2487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang EORNE r6, r4, r14,ASR #31 2497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRH r6, [r0], r8 2507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r1, r1, #4 2527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT unroll_loop6 2537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangunroll_over6: 2557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13!,{r4,r6-r11,PC} 2567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_shift_right: 2587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = n 2597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in 2607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = right 2617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r4-r11,r14} 2627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r0, r0, LSR #2 @ n >>= 2 2647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r1, r1, #4 2657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r0, r0, #8 2677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLT sr_less_than_8 2687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangsr_loop: 2697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r3, [r1], #8 2707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r4, [r1], #8 2717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r5, [r1], #8 2727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6, [r1], #8 2737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7, [r1], #8 2747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r8, [r1], #8 2757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r12,[r1], #8 2767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r14,[r1], #8 2777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r0, r0, #8 2787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r2!,{r3,r4,r5,r6,r7,r8,r12,r14} 2797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGE sr_loop 2807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangsr_less_than_8: 2817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADDS r0, r0, #8 2827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BEQ sr_end 2837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangsr_loop2: 2847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r3, [r1], #8 2857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r0, r0, #1 2867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r3, [r2], #4 2877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT sr_loop2 2887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangsr_end: 2897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13!,{r4-r11,PC} 2907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_backwardARM: 2927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = n 2937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in 2947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r4-r11,r14} 2957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 2967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r2,#1<<4 @ r2 = 1<<shift 2977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r3,#13-4 @ r3 = 13-shift 2987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangfind_shift_loop: 2997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang TST r0,r2 @ if (n & (1<<shift)) == 0 3007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r2,r2,LSL #1 3017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBEQ r3,r3,#1 @ shift-- 3027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BEQ find_shift_loop 3037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r2,#2 3047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r2,r2,LSL r3 @ r2 = step = 2<<shift 3057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ presymmetry 3077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = n (a multiple of 4) 3087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in 3097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = step 3107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = shift 3117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r4, r1, r0, LSL #1 @ r4 = aX = in+(n>>1) 3137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r14,r1, r0 @ r14= in+(n>>2) 3147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r4, r4, #3*4 @ r4 = aX = in+n2-3 315e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADRL r7, .Lsincos_lookup 316e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel LDR r5, [r7] @ r5 = T=sincos_lookup0 317e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADD r5, r7 3187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangpresymmetry_loop1: 3207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7, [r4,#8] @ r6 = s2 = aX[2] 3217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r11,[r5,#4] @ r11= T[1] 3227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6, [r4] @ r6 = s0 = aX[0] 3237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r5],r2,LSL #2 @ r10= T[0] T += step 3247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XPROD31(s0, s2, T[0], T[1], 0xaX[0], &ax[2]) 3267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r8, r9, r7, r11 @ (r8, r9) = s2*T[1] 3277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall 3287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 3297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r8, r9, r6, r10 @ (r8, r9) += s0*T[0] 3307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r6, r6, #0 3317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 3327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r8, r12,r7, r10 @ (r8, r12) = s2*T[0] 3337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r9, r9, LSL #1 3347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 3357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r8, r12,r6, r11 @ (r8, r12) -= s0*T[1] 3367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r9, [r4],#-16 @ aX[0] = r9 3377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r4,r14 3387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 3397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r12,[r4,#8+16] @ aX[2] = r12 3407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGE presymmetry_loop1 @ while (aX >= in+n4) 3427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangpresymmetry_loop2: 3447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6,[r4] @ r6 = s0 = aX[0] 3457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r5,#4] @ r10= T[1] 3467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7,[r4,#8] @ r6 = s2 = aX[2] 3477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r11,[r5],-r2,LSL #2 @ r11= T[0] T -= step 3487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XPROD31(s0, s2, T[1], T[0], 0xaX[0], &ax[2]) 3507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r8, r9, r6, r10 @ (r8, r9) = s0*T[1] 3517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall 3527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 3537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r8, r9, r7, r11 @ (r8, r9) += s2*T[0] 3547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r6, r6, #0 3557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 3567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r8, r12,r7, r10 @ (r8, r12) = s2*T[1] 3577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r9, r9, LSL #1 3587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 3597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r8, r12,r6, r11 @ (r8, r12) -= s0*T[0] 3607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r9, [r4],#-16 @ aX[0] = r9 3617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r4,r1 3627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 3637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r12,[r4,#8+16] @ aX[2] = r12 3647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGE presymmetry_loop2 @ while (aX >= in) 3667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = n 3687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in 3697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = step 3707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = shift 3717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r3} 372e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADRL r4, .Lsincos_lookup 373e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel LDR r5, [r4] @ r5 = T=sincos_lookup0 374e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADD r5, r4 3757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r4, r1, r0, LSL #1 @ r4 = aX = in+(n>>1) 3767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r4, r4, #4*4 @ r4 = aX = in+(n>>1)-4 3777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r11,[r5,#4] @ r11= T[1] 3787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r5],r2, LSL #2 @ r10= T[0] T += step 3797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangpresymmetry_loop3: 3807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r8,[r1],#16 @ r8 = ro0 = bX[0] 3817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r9,[r1,#8-16] @ r9 = ro2 = bX[2] 3827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6,[r4] @ r6 = ri0 = aX[0] 3837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 3847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XNPROD31( ro2, ro0, T[1], T[0], 0xaX[0], &aX[2] ) 3857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ aX[0] = (ro2*T[1] - ro0*T[0])>>31 aX[2] = (ro0*T[1] + ro2*T[0])>>31 3867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r12,r8, r11 @ (r14,r12) = ro0*T[1] 3877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r8,r8,#0 @ r8 = -ro0 3887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ Stall ? 3897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r12,r9, r10 @ (r14,r12) += ro2*T[0] 3907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7,[r4,#8] @ r7 = ri2 = aX[2] 3917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ Stall ? 3927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r3, r9, r11 @ (r14,r3) = ro2*T[1] 3937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 3947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r11,[r5,#4] @ r11= T[1] 3957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r3, r8, r10 @ (r14,r3) -= ro0*T[0] 3967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r5],r2, LSL #2 @ r10= T[0] T += step 3977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r12,[r4,#8] 3987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r3, r3, LSL #1 3997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r3, [r4],#-16 4007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XNPROD31( ri2, ri0, T[0], T[1], 0xbX[0], &bX[2] ) 4027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ bX[0] = (ri2*T[0] - ri0*T[1])>>31 bX[2] = (ri0*T[0] + ri2*T[1])>>31 4037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r12,r6, r10 @ (r14,r12) = ri0*T[0] 4047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r6,r6,#0 @ r6 = -ri0 4057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 4067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r12,r7, r11 @ (r14,r12) += ri2*T[1] 4077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 4087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 4097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r3, r7, r10 @ (r14,r3) = ri2*T[0] 4107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 4117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall ? 4127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r3, r6, r11 @ (r14,r3) -= ri0*T[1] 4137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r4,r1 4147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r12,[r1,#8-16] 4157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r3, r3, LSL #1 4167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r3, [r1,#-16] 4177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGE presymmetry_loop3 4197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r1,r1,r0 @ r1 = in -= n>>2 (i.e. restore in) 4217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r3,[r13] 4237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r2,[r13,#-4]! 4247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterflies 4267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = n = (points * 2) 4277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in = x 4287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = i 4297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = shift 4307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r0-r1} 431e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADRL r4, .Lsincos_lookup 432e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel LDR r5, [r4] 433e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADD r5, r4 4347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSBS r4,r3,#6 @ r4 = stages = 7-shift then --stages 4357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLE no_generics 4367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,#4 @ r14= 4 (i=0) 4377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r6, r14,LSL r3 @ r6 = (4<<i)<<shift 4387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_butterflies_loop1: 4397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r0, r0, LSR #1 @ r0 = points>>i = POINTS 4407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r2, r14,LSR #2 @ r2 = (1<<i)-j (j=0) 4417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMFD r13!,{r4,r14} 4427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_butterflies_loop2: 4437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterfly_generic(x+POINTS*j, POINTS, 4<<(i+shift)) 4457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterfly_generic(r1, r0, r6) 4467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = points 4477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = x 4487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ preserve r2 (external loop counter) 4497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ preserve r3 4507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ preserve r4 (external loop counter) 4517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r5 = T = sincos_lookup0 4527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = step 4537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ preserve r14 4547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r2,[r13,#-4]! @ stack r2 4567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r1,r1,r0,LSL #1 @ r1 = x2+4 = x + (POINTS>>1) 4577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r7,r1,r0,LSL #1 @ r7 = x1+4 = x + POINTS 4587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r5,#1024*4 @ r12= sincos_lookup0+1024 4597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_bufferfly_generic_loop1: 4617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMDB r7!,{r2,r3,r8,r11} @ r2 = x1[0] 4627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = x1[1] 4637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x1[2] 4647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x1[3] x1 -= 4 4657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMDB r1!,{r4,r9,r10,r14} @ r4 = x2[0] 4667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x2[1] 4677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x2[2] 4687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= x2[3] x2 -= 4 4697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r2, r2, r3 @ r2 = s0 = x1[0] - x1[1] 4717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r3, r2, r3, LSL #1 @ r3 = x1[0] + x1[1] (-> x1[0]) 4727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r11,r8 @ r11= s1 = x1[3] - x1[2] 4737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r11,r8, LSL #1 @ r8 = x1[3] + x1[2] (-> x1[2]) 4747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r9, r4 @ r9 = s2 = x2[1] - x2[0] 4757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r4, r9, r4, LSL #1 @ r4 = x2[1] + x2[0] (-> x1[1]) 4767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r14,r10 @ r14= s3 = x2[3] - x2[2] 4777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r14,r10,LSL #1 @ r10= x2[3] + x2[2] (-> x1[3]) 4787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r7,{r3,r4,r8,r10} 4797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = points 4817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = x2 4827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = s0 4837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 free 4847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r4 free 4857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r5 = T 4867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = step 4877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r7 = x1 4887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 free 4897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = s2 4907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10 free 4917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= s1 4927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= limit 4937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= s3 4947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r8, [r5,#4] @ r8 = T[1] 4967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r5],r6,LSL #2 @ r10= T[0] T += step 4977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 4987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XPROD31(s1, s0, T[0], T[1], &x2[0], &x2[2]) 4997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x2[0] = (s1*T[0] + s0*T[1])>>31 x2[2] = (s0*T[0] - s1*T[1])>>31 5007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall Xscale 5017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r4, r3, r2, r8 @ (r4, r3) = s0*T[1] 5027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r4, r3, r11,r10 @ (r4, r3) += s1*T[0] 5037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r11,r11,#0 5047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r11,r4, r8, r11 @ (r11,r4) = -s1*T[1] 5057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r11,r4, r2, r10 @ (r11,r4) += s0*T[0] 5067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r2, r3, LSL #1 @ r2 = r3<<1 = Value for x2[0] 5077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XPROD31(s2, s3, T[0], T[1], &x2[1], &x2[3]) 5097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x2[1] = (s2*T[0] + s3*T[1])>>31 x2[3] = (s3*T[0] - s2*T[1])>>31 5107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r11,r3, r9, r10 @ (r11,r3) = s2*T[0] 5117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r4, r4, LSL #1 @ r4 = r4<<1 = Value for x2[2] 5127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r11,r3, r14,r8 @ (r11,r3) += s3*T[1] 5137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r9, r9, #0 5147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r10,r11,r14,r10 @ (r10,r11) = s3*T[0] 5157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r3, r3, LSL #1 @ r3 = r3<<1 = Value for x2[1] 5167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r10,r11,r9,r8 @ (r10,r11) -= s2*T[1] 5177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r5, r12 5187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r11,r11,LSL #1 @ r11= r11<<1 = Value for x2[3] 5197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1,{r2,r3,r4,r11} 5217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLT mdct_bufferfly_generic_loop1 5237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,#1024*4 5257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_bufferfly_generic_loop2: 5267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMDB r7!,{r2,r3,r9,r10} @ r2 = x1[0] 5277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = x1[1] 5287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x1[2] 5297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x1[3] x1 -= 4 5307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMDB r1!,{r4,r8,r11,r14} @ r4 = x2[0] 5317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x2[1] 5327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x2[2] 5337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= x2[3] x2 -= 4 5347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r2, r2, r3 @ r2 = s0 = x1[0] - x1[1] 5367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r3, r2, r3, LSL #1 @ r3 = x1[0] + x1[1] (-> x1[0]) 5377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r9,r10 @ r9 = s1 = x1[2] - x1[3] 5387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r9,r10, LSL #1 @ r10= x1[2] + x1[3] (-> x1[2]) 5397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r4, r4, r8 @ r4 = s2 = x2[0] - x2[1] 5407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r4, r8, LSL #1 @ r8 = x2[0] + x2[1] (-> x1[1]) 5417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r14,r11 @ r14= s3 = x2[3] - x2[2] 5427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r14,r11,LSL #1 @ r11= x2[3] + x2[2] (-> x1[3]) 5437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r7,{r3,r8,r10,r11} 5447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = points 5467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = x2 5477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = s0 5487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 free 5497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r4 = s2 5507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r5 = T 5517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = step 5527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r7 = x1 5537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 free 5547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = s1 5557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10 free 5567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11 free 5577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= limit 5587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= s3 5597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r8, [r5,#4] @ r8 = T[1] 5617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r5],-r6,LSL #2 @ r10= T[0] T -= step 5627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XNPROD31(s0, s1, T[0], T[1], &x2[0], &x2[2]) 5647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x2[0] = (s0*T[0] - s1*T[1])>>31 x2[2] = (s1*T[0] + s0*T[1])>>31 5657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall Xscale 5667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r3, r11,r2, r8 @ (r3, r11) = s0*T[1] 5677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r3, r11,r9, r10 @ (r3, r11) += s1*T[0] 5687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r9, r9, #0 5697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r3, r2, r10,r2 @ (r3, r2) = s0*T[0] 5707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r3, r2, r9, r8 @ (r3, r2) += -s1*T[1] 5717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r9, r11,LSL #1 @ r9 = r11<<1 = Value for x2[2] 5727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XNPROD31(s3, s2, T[0], T[1], &x2[1], &x2[3]) 5747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x2[1] = (s3*T[0] - s2*T[1])>>31 x2[3] = (s2*T[0] + s3*T[1])>>31 5757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r3, r11,r4, r10 @ (r3,r11) = s2*T[0] 5767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r2, r2, LSL #1 @ r2 = r2<<1 = Value for x2[0] 5777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r3, r11,r14,r8 @ (r3,r11) += s3*T[1] 5787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r4, r4, #0 5797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r10,r3,r14,r10 @ (r10,r3) = s3*T[0] 5807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r11,r11,LSL #1 @ r11= r11<<1 = Value for x2[3] 5817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r10,r3, r4, r8 @ (r10,r3) -= s2*T[1] 5827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r5, r12 5837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r3, r3, LSL #1 @ r3 = r3<<1 = Value for x2[1] 5847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1,{r2,r3,r9,r11} 5867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT mdct_bufferfly_generic_loop2 5887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r2,[r13],#4 @ unstack r2 5907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r1, r1, r0, LSL #2 @ r1 = x+POINTS*j 5917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall Xscale 5927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r2, r2, #1 @ r2-- (j++) 5937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT mdct_butterflies_loop2 5947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13!,{r4,r14} 5967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r1,[r13,#4] 5987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 5997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r4, r4, #1 @ stages-- 6007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r14,r14,LSL #1 @ r14= 4<<i (i++) 6017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r6, r6, LSL #1 @ r6 = step <<= 1 (i++) 6027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGE mdct_butterflies_loop1 6037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13,{r0-r1} 6047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangno_generics: 6057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterflies part2 (loop around mdct_bufferfly_32) 6067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = points 6077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in 6087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = step 6097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = shift 6107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_bufferflies_loop3: 6127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_bufferfly_32 6137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ block1 6157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r4, r1, #16*4 @ r4 = &in[16] 6167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r4,{r5,r6,r9,r10} @ r5 = x[16] 6177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[17] 6187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[18] 6197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[19] 6207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r7,r8,r11,r12} @ r7 = x[0] 6217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[1] 6227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[2] 6237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[3] 6247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r6 @ r5 = s0 = x[16] - x[17] 6257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r5, r6, LSL #1 @ r6 = x[16] + x[17] -> x[16] 6267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r9, r10 @ r9 = s1 = x[18] - x[19] 6277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r9, r10,LSL #1 @ r10= x[18] + x[19] -> x[18] 6287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r8, r8, r7 @ r8 = s2 = x[ 1] - x[ 0] 6297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r7, r8, r7, LSL #1 @ r7 = x[ 1] + x[ 0] -> x[17] 6307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,r11 @ r12= s3 = x[ 3] - x[ 2] 6317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r12,r11, LSL #1 @ r11= x[ 3] + x[ 2] -> x[19] 6327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r4!,{r6,r7,r10,r11} 6337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6,cPI1_8 6357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7,cPI3_8 6367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XNPROD31( s0, s1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] ) 6387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x[0] = s0*cPI3_8 - s1*cPI1_8 x[2] = s1*cPI3_8 + s0*cPI1_8 6397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall Xscale 6407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r11,r5, r6 @ (r14,r11) = s0*cPI1_8 6417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r11,r9, r7 @ (r14,r11) += s1*cPI3_8 6427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r9, r9, #0 6437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r5, r7, r5 @ (r14,r5) = s0*cPI3_8 6447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r5, r9, r6 @ (r14,r5) -= s1*cPI1_8 6457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r11,r11,LSL #1 6467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, LSL #1 6477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XPROD31 ( s2, s3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] ) 6497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x[1] = s2*cPI1_8 + s3*cPI3_8 x[3] = s3*cPI1_8 - s2*cPI3_8 6507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r9, r8, r6 @ (r14,r9) = s2*cPI1_8 6517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r9, r12,r7 @ (r14,r9) += s3*cPI3_8 6527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r8,r8,#0 6537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r12,r6, r12 @ (r14,r12) = s3*cPI1_8 6547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r12,r8, r7 @ (r14,r12) -= s2*cPI3_8 6557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r9, r9, LSL #1 6567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 6577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1!,{r5,r9,r11,r12} 6587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ block2 6607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r4,{r5,r6,r9,r10} @ r5 = x[20] 6617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[21] 6627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[22] 6637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[23] 6647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r7,r8,r11,r12} @ r7 = x[4] 6657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[5] 6667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[6] 6677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[7] 6687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r6 @ r5 = s0 = x[20] - x[21] 6697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r5, r6, LSL #1 @ r6 = x[20] + x[21] -> x[20] 6707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r9, r10 @ r9 = s1 = x[22] - x[23] 6717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r9, r10,LSL #1 @ r10= x[22] + x[23] -> x[22] 6727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r8, r8, r7 @ r8 = s2 = x[ 5] - x[ 4] 6737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r7, r8, r7, LSL #1 @ r7 = x[ 5] + x[ 4] -> x[21] 6747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,r11 @ r12= s3 = x[ 7] - x[ 6] 6757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r12,r11, LSL #1 @ r11= x[ 7] + x[ 6] -> x[23] 6767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r14,cPI2_8 6777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r4!,{r6,r7,r10,r11} 6787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r9 @ r5 = s0 - s1 6807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r9, r5, r9, LSL #1 @ r9 = s0 + s1 6817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r5, r14,r5 @ (r6,r5) = (s0-s1)*cPI2_8 6827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,r8 @ r12= s3 - s2 6837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r12,r8, LSL #1 @ r8 = s3 + s2 6847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r8, r14,r8 @ (r6,r8) = (s3+s2)*cPI2_8 6867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, LSL #1 6877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r9, r14,r9 @ (r6,r9) = (s0+s1)*cPI2_8 6887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r8, r8, LSL #1 6897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r12,r14,r12 @ (r6,r12) = (s3-s2)*cPI2_8 6907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r9, r9, LSL #1 6917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 6927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1!,{r5,r8,r9,r12} 6937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 6947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ block3 6957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r4,{r5,r6,r9,r10} @ r5 = x[24] 6967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[25] 6977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[25] 6987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[26] 6997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r7,r8,r11,r12} @ r7 = x[8] 7007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[9] 7017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[10] 7027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[11] 7037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r6 @ r5 = s0 = x[24] - x[25] 7047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r5, r6, LSL #1 @ r6 = x[24] + x[25] -> x[25] 7057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r9, r10 @ r9 = s1 = x[26] - x[27] 7067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r9, r10,LSL #1 @ r10= x[26] + x[27] -> x[26] 7077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r8, r8, r7 @ r8 = s2 = x[ 9] - x[ 8] 7087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r7, r8, r7, LSL #1 @ r7 = x[ 9] + x[ 8] -> x[25] 7097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,r11 @ r12= s3 = x[11] - x[10] 7107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r12,r11, LSL #1 @ r11= x[11] + x[10] -> x[27] 7117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r4!,{r6,r7,r10,r11} 7127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 7137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6,cPI3_8 7147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r7,cPI1_8 7157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 7167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XNPROD31( s0, s1, cPI1_8, cPI3_8, &x[ 8], &x[10] ) 7177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x[8] = s0*cPI1_8 - s1*cPI3_8 x[10] = s1*cPI1_8 + s0*cPI3_8 7187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall Xscale 7197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r11,r5, r6 @ (r14,r11) = s0*cPI3_8 7207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r11,r9, r7 @ (r14,r11) += s1*cPI1_8 7217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r9, r9, #0 7227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r5, r7, r5 @ (r14,r5) = s0*cPI1_8 7237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r5, r9, r6 @ (r14,r5) -= s1*cPI3_8 7247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r11,r11,LSL #1 7257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, LSL #1 7267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 7277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XPROD31 ( s2, s3, cPI3_8, cPI1_8, &x[ 9], &x[11] ) 7287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x[9] = s2*cPI3_8 + s3*cPI1_8 x[11] = s3*cPI3_8 - s2*cPI1_8 7297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r9, r8, r6 @ (r14,r9) = s2*cPI3_8 7307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r9, r12,r7 @ (r14,r9) += s3*cPI1_8 7317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r8,r8,#0 7327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r14,r12,r6, r12 @ (r14,r12) = s3*cPI3_8 7337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r14,r12,r8, r7 @ (r14,r12) -= s2*cPI1_8 7347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r9, r9, LSL #1 7357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 7367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1!,{r5,r9,r11,r12} 7377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 7387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ block4 7397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r4,{r5,r6,r10,r11} @ r5 = x[28] 7407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[29] 7417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[30] 7427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[31] 7437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r8,r9,r12,r14} @ r8 = x[12] 7447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[13] 7457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[14] 7467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= x[15] 7477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r6 @ r5 = s0 = x[28] - x[29] 7487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r5, r6, LSL #1 @ r6 = x[28] + x[29] -> x[28] 7497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r7, r14,r12 @ r7 = s3 = x[15] - x[14] 7507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r7, r12, LSL #1 @ r12= x[15] + x[14] -> x[31] 7517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r10,r10,r11 @ r10= s1 = x[30] - x[31] 7527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r10,r11,LSL #1 @ r11= x[30] + x[31] -> x[30] 7537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14, r8, r9 @ r14= s2 = x[12] - x[13] 7547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r9, r14, r9, LSL #1 @ r9 = x[12] + x[13] -> x[29] 7557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r4!,{r6,r9,r11,r12} 7567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1!,{r5,r7,r10,r14} 7577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 7587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterfly16 (1st version) 7597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ block 1 7607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r1,r1,#16*4 7617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r4,r1,#8*4 7627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r4,{r5,r6,r9,r10} @ r5 = x[ 8] 7637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[ 9] 7647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[10] 7657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[11] 7667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r7,r8,r11,r12} @ r7 = x[0] 7677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[1] 7687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[2] 7697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[3] 7707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r6 @ r5 = s0 = x[ 8] - x[ 9] 7717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r5, r6, LSL #1 @ r6 = x[ 8] + x[ 9] -> x[ 8] 7727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r9, r10 @ r9 = s1 = x[10] - x[11] 7737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r9, r10,LSL #1 @ r10= x[10] + x[11] -> x[10] 7747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r8, r8, r7 @ r8 = s2 = x[ 1] - x[ 0] 7757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r7, r8, r7, LSL #1 @ r7 = x[ 1] + x[ 0] -> x[ 9] 7767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,r11 @ r12= s3 = x[ 3] - x[ 2] 7777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r12,r11, LSL #1 @ r11= x[ 3] + x[ 2] -> x[11] 7787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r14,cPI2_8 7797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r4!,{r6,r7,r10,r11} 7807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 7817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r9 @ r5 = s0 - s1 7827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r9, r5, r9, LSL #1 @ r9 = s0 + s1 7837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r5, r14,r5 @ (r6,r5) = (s0-s1)*cPI2_8 7847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,r8 @ r12= s3 - s2 7857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r12,r8, LSL #1 @ r8 = s3 + s2 7867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 7877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r8, r14,r8 @ (r6,r8) = (s3+s2)*cPI2_8 7887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, LSL #1 7897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r9, r14,r9 @ (r6,r9) = (s0+s1)*cPI2_8 7907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r8, r8, LSL #1 7917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r12,r14,r12 @ (r6,r12) = (s3-s2)*cPI2_8 7927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r9, r9, LSL #1 7937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 7947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1!,{r5,r8,r9,r12} 7957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 7967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ block4 7977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r4,{r5,r6,r9,r10} @ r5 = x[12] 7987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[13] 7997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[14] 8007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[15] 8017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r7,r8,r11,r12} @ r7 = x[ 4] 8027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[ 5] 8037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[ 6] 8047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[ 7] 8057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r7, r8 @ r14= s0 = x[ 4] - x[ 5] 8067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r14,r8, LSL #1 @ r8 = x[ 4] + x[ 5] -> x[13] 8077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r7, r12,r11 @ r7 = s1 = x[ 7] - x[ 6] 8087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r7, r11, LSL #1 @ r11= x[ 7] + x[ 6] -> x[15] 8097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r6 @ r5 = s2 = x[12] - x[13] 8107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r5, r6, LSL #1 @ r6 = x[12] + x[13] -> x[12] 8117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r9, r10 @ r12= s3 = x[14] - x[15] 8127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r12,r10,LSL #1 @ r10= x[14] + x[15] -> x[14] 8137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r4!,{r6,r8,r10,r11} 8147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1!,{r5,r7,r12,r14} 8157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 8167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterfly_8 8177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMDB r1,{r6,r7,r8,r9,r10,r11,r12,r14} 8187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[0] 8197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r7 = x[1] 8207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[2] 8217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[3] 8227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[4] 8237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[5] 8247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[6] 8257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= x[7] 8267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r6, r7 @ r6 = s0 = x[0] + x[1] 8277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r7, r6, r7, LSL #1 @ r7 = s1 = x[0] - x[1] 8287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r8, r9 @ r8 = s2 = x[2] + x[3] 8297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r8, r9, LSL #1 @ r9 = s3 = x[2] - x[3] 8307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r10,r11 @ r10= s4 = x[4] + x[5] 8317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r10,r11,LSL #1 @ r11= s5 = x[4] - x[5] 8327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r12,r14 @ r12= s6 = x[6] + x[7] 8337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r12,r14,LSL #1 @ r14= s7 = x[6] - x[7] 8347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 8357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r2, r11,r9 @ r2 = x[0] = s5 + s3 8367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r4, r2, r9, LSL #1 @ r4 = x[2] = s5 - s3 8377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r3, r14,r7 @ r3 = x[1] = s7 - s1 8387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r5, r3, r7, LSL #1 @ r5 = x[3] = s7 + s1 8397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r10,r10,r6 @ r10= x[4] = s4 - s0 8407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r12,r8 @ r11= x[5] = s6 - s2 8417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r10,r6, LSL #1 @ r12= x[6] = s4 + s0 8427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r14,r11,r8, LSL #1 @ r14= x[7] = s6 + s2 8437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMDB r1,{r2,r3,r4,r5,r10,r11,r12,r14} 8447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 8457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterfly_8 8467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r6,r7,r8,r9,r10,r11,r12,r14} 8477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[0] 8487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r7 = x[1] 8497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[2] 8507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[3] 8517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[4] 8527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[5] 8537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[6] 8547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= x[7] 8557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r6, r7 @ r6 = s0 = x[0] + x[1] 8567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r7, r6, r7, LSL #1 @ r7 = s1 = x[0] - x[1] 8577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r8, r9 @ r8 = s2 = x[2] + x[3] 8587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r8, r9, LSL #1 @ r9 = s3 = x[2] - x[3] 8597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r10,r11 @ r10= s4 = x[4] + x[5] 8607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r10,r11,LSL #1 @ r11= s5 = x[4] - x[5] 8617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r12,r14 @ r12= s6 = x[6] + x[7] 8627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r12,r14,LSL #1 @ r14= s7 = x[6] - x[7] 8637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 8647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r2, r11,r9 @ r2 = x[0] = s5 + s3 8657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r4, r2, r9, LSL #1 @ r4 = x[2] = s5 - s3 8667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r3, r14,r7 @ r3 = x[1] = s7 - s1 8677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r5, r3, r7, LSL #1 @ r5 = x[3] = s7 + s1 8687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r10,r10,r6 @ r10= x[4] = s4 - s0 8697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r12,r8 @ r11= x[5] = s6 - s2 8707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r10,r6, LSL #1 @ r12= x[6] = s4 + s0 8717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r14,r11,r8, LSL #1 @ r14= x[7] = s6 + s2 8727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1,{r2,r3,r4,r5,r10,r11,r12,r14} 8737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 8747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ block 2 8757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r1,r1,#16*4-8*4 8767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r4,r1,#8*4 8777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r4,{r5,r6,r9,r10} @ r5 = x[ 8] 8787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[ 9] 8797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[10] 8807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[11] 8817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r7,r8,r11,r12} @ r7 = x[0] 8827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[1] 8837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[2] 8847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[3] 8857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r6 @ r5 = s0 = x[ 8] - x[ 9] 8867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r5, r6, LSL #1 @ r6 = x[ 8] + x[ 9] -> x[ 8] 8877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r9, r10 @ r9 = s1 = x[10] - x[11] 8887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r9, r10,LSL #1 @ r10= x[10] + x[11] -> x[10] 8897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r8, r8, r7 @ r8 = s2 = x[ 1] - x[ 0] 8907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r7, r8, r7, LSL #1 @ r7 = x[ 1] + x[ 0] -> x[ 9] 8917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,r11 @ r12= s3 = x[ 3] - x[ 2] 8927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r12,r11, LSL #1 @ r11= x[ 3] + x[ 2] -> x[11] 8937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r14,cPI2_8 8947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r4!,{r6,r7,r10,r11} 8957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 8967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r9 @ r5 = s0 - s1 8977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r9, r5, r9, LSL #1 @ r9 = s0 + s1 8987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r5, r14,r5 @ (r6,r5) = (s0-s1)*cPI2_8 8997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r12,r8 @ r12= s3 - s2 9007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r12,r8, LSL #1 @ r8 = s3 + s2 9017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r8, r14,r8 @ (r6,r8) = (s3+s2)*cPI2_8 9037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r5, r5, LSL #1 9047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r9, r14,r9 @ (r6,r9) = (s0+s1)*cPI2_8 9057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r8, r8, LSL #1 9067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r6, r12,r14,r12 @ (r6,r12) = (s3-s2)*cPI2_8 9077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r9, r9, LSL #1 9087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r12,r12,LSL #1 9097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1!,{r5,r8,r9,r12} 9107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ block4 9127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r4,{r5,r6,r9,r10} @ r5 = x[12] 9137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[13] 9147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[14] 9157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[15] 9167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r7,r8,r11,r12} @ r7 = x[ 4] 9177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[ 5] 9187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[ 6] 9197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[ 7] 9207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, r6 @ r5 = s2 = x[12] - x[13] 9217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r5, r6, LSL #1 @ r6 = x[12] + x[13] -> x[12] 9227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r9, r10 @ r9 = s3 = x[14] - x[15] 9237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r9, r10,LSL #1 @ r10= x[14] + x[15] -> x[14] 9247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r7, r8 @ r14= s0 = x[ 4] - x[ 5] 9257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r14,r8, LSL #1 @ r8 = x[ 4] + x[ 5] -> x[13] 9267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r7, r12,r11 @ r7 = s1 = x[ 7] - x[ 6] 9277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r11,r7, r11, LSL #1 @ r11= x[ 7] + x[ 6] -> x[15] 9287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r4!,{r6,r8,r10,r11} 9297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1!,{r5,r7,r9,r14} 9307913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterfly_8 9327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMDB r1,{r6,r7,r8,r9,r10,r11,r12,r14} 9337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[0] 9347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r7 = x[1] 9357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[2] 9367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[3] 9377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[4] 9387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[5] 9397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[6] 9407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= x[7] 9417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r6, r7 @ r6 = s0 = x[0] + x[1] 9427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r7, r6, r7, LSL #1 @ r7 = s1 = x[0] - x[1] 9437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r8, r9 @ r8 = s2 = x[2] + x[3] 9447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r8, r9, LSL #1 @ r9 = s3 = x[2] - x[3] 9457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r10,r11 @ r10= s4 = x[4] + x[5] 9467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r10,r11,LSL #1 @ r11= s5 = x[4] - x[5] 9477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r12,r14 @ r12= s6 = x[6] + x[7] 9487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r12,r14,LSL #1 @ r14= s7 = x[6] - x[7] 9497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r2, r11,r9 @ r2 = x[0] = s5 + s3 9517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r4, r2, r9, LSL #1 @ r4 = x[2] = s5 - s3 9527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r3, r14,r7 @ r3 = x[1] = s7 - s1 9537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r5, r3, r7, LSL #1 @ r5 = x[3] = s7 + s1 9547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r10,r10,r6 @ r10= x[4] = s4 - s0 9557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r12,r8 @ r11= x[5] = s6 - s2 9567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r10,r6, LSL #1 @ r12= x[6] = s4 + s0 9577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r14,r11,r8, LSL #1 @ r14= x[7] = s6 + s2 9587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMDB r1,{r2,r3,r4,r5,r10,r11,r12,r14} 9597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_butterfly_8 9617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMIA r1,{r6,r7,r8,r9,r10,r11,r12,r14} 9627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r6 = x[0] 9637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r7 = x[1] 9647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r8 = x[2] 9657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = x[3] 9667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= x[4] 9677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r11= x[5] 9687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r12= x[6] 9697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r14= x[7] 9707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r6, r7 @ r6 = s0 = x[0] + x[1] 9717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r7, r6, r7, LSL #1 @ r7 = s1 = x[0] - x[1] 9727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r8, r9 @ r8 = s2 = x[2] + x[3] 9737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r9, r8, r9, LSL #1 @ r9 = s3 = x[2] - x[3] 9747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r10,r11 @ r10= s4 = x[4] + x[5] 9757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r10,r11,LSL #1 @ r11= s5 = x[4] - x[5] 9767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r12,r14 @ r12= s6 = x[6] + x[7] 9777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r12,r14,LSL #1 @ r14= s7 = x[6] - x[7] 9787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r2, r11,r9 @ r2 = x[0] = s5 + s3 9807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r4, r2, r9, LSL #1 @ r4 = x[2] = s5 - s3 9817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r3, r14,r7 @ r3 = x[1] = s7 - s1 9827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r5, r3, r7, LSL #1 @ r5 = x[3] = s7 + s1 9837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r10,r10,r6 @ r10= x[4] = s4 - s0 9847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r12,r8 @ r11= x[5] = s6 - s2 9857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r12,r10,r6, LSL #1 @ r12= x[6] = s4 + s0 9867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r14,r11,r8, LSL #1 @ r14= x[7] = s6 + s2 9877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STMIA r1,{r2,r3,r4,r5,r10,r11,r12,r14} 9887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r1,r1,#8*4 9907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUBS r0,r0,#64 9917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT mdct_bufferflies_loop3 9927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13,{r0-r3} 9947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 9957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_bitreverseARM: 996021523c8f11a487b993a1bce5304752b21754574Gloria Wang @ r0 = points = n 9977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in 9987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = step 9997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = shift 10007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r4, #0 @ r4 = bit = 0 10027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r5, r1, r0, LSL #1 @ r5 = w = x + (n>>1) 10037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADR r6, bitrev 10047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r5, r5, #8 10057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangbrev_lp: 10067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDRB r7, [r6, r4, LSR #6] 10077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang AND r8, r4, #0x3f 10087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDRB r8, [r6, r8] 10097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r4, r4, #1 @ bit++ 10107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ stall XScale 10117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ORR r7, r7, r8, LSL #6 @ r7 = bitrev[bit] 1012021523c8f11a487b993a1bce5304752b21754574Gloria Wang MOV r7, r7, LSR r3 1013021523c8f11a487b993a1bce5304752b21754574Gloria Wang ADD r9, r1, r7, LSL #2 @ r9 = xx = x + (b>>shift) 10147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r5, r9 @ if (w > xx) 10157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r5],#-8 @ r10 = w[0] w -= 2 10167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDRGT r11,[r5,#12] @ r11 = w[1] 10177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDRGT r12,[r9] @ r12 = xx[0] 10187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDRGT r14,[r9,#4] @ r14 = xx[1] 10197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRGT r10,[r9] @ xx[0]= w[0] 10207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRGT r11,[r9,#4] @ xx[1]= w[1] 10217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRGT r12,[r5,#8] @ w[0] = xx[0] 10227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STRGT r14,[r5,#12] @ w[1] = xx[1] 10237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r5,r1 10247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BGT brev_lp 10257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ mdct_step7 10277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = points 10287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in 10297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = step 1030021523c8f11a487b993a1bce5304752b21754574Gloria Wang @ r3 = shift 10317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r2, #4 @ r5 = T = (step>=4) ? 1033e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADR r7, .Lsincos_lookup @ sincos_lookup0 + 1034e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADDLT r7, #4 @ sincos_lookup1 1035e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel LDR r5, [r7] 1036e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADD r5, r7 10377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r7, r1, r0, LSL #1 @ r7 = w1 = x + (n>>1) 10387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADDGE r5, r5, r2, LSL #1 @ (step>>1) 10397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r8, r5, #1024*4 @ r8 = Ttop 10407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangstep7_loop1: 10417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6, [r1] @ r6 = w0[0] 10427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r9, [r1,#4] @ r9 = w0[1] 10437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r7,#-8]! @ r10= w1[0] w1 -= 2 10447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r11,[r7,#4] @ r11= w1[1] 10457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r14,[r5,#4] @ r14= T[1] 10467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r12,[r5],r2,LSL #2 @ r12= T[0] T += step 10477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r6, r10 @ r6 = s0 = w0[0] + w1[0] 10497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r10,r6, r10,LSL #1 @ r10= s1b= w0[0] - w1[0] 10507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r11,r9 @ r11= s1 = w1[1] - w0[1] 10517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r9, r11,r9, LSL #1 @ r9 = s0b= w1[1] + w0[1] 10527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ Can save 1 cycle by using SMULL SMLAL - at the cost of being 10547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ 1 off. 10557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r0, r3, r6, r14 @ (r0,r3) = s0*T[1] 10567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r0, r4, r11,r12 @ (r0,r4) += s1*T[0] = s2 10577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r3, r3, r4 10587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r0, r14,r11,r14 @ (r0,r14) = s1*T[1] 10597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r0, r12,r6, r12 @ (r0,r12) += s0*T[0] = s3 10607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r14,r12 10617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = s0b<<1 10637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= s1b<<1 10647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r9, r3, r9, ASR #1 @ r9 = s0b + s2 10657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r3, r9, r3, LSL #1 @ r3 = s0b - s2 10667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r14,r10,ASR #1 @ r12= s3 - s1b 10687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r14,r10,ASR #1 @ r10= s3 + s1b 10697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r9, [r1],#4 10707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r10,[r1],#4 @ w0 += 2 10717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r3, [r7] 10727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r12,[r7,#4] 10737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r5,r8 10757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLT step7_loop1 10767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangstep7_loop2: 10787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6, [r1] @ r6 = w0[0] 10797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r9, [r1,#4] @ r9 = w0[1] 10807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r10,[r7,#-8]! @ r10= w1[0] w1 -= 2 10817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r11,[r7,#4] @ r11= w1[1] 10827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r14,[r5,-r2,LSL #2]! @ r12= T[1] T -= step 10837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r12,[r5,#4] @ r14= T[0] 10847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r6, r6, r10 @ r6 = s0 = w0[0] + w1[0] 10867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r10,r6, r10,LSL #1 @ r10= s1b= w0[0] - w1[0] 10877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r11,r11,r9 @ r11= s1 = w1[1] - w0[1] 10887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r9, r11,r9, LSL #1 @ r9 = s0b= w1[1] + w0[1] 10897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ Can save 1 cycle by using SMULL SMLAL - at the cost of being 10917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ 1 off. 10927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r0, r3, r6, r14 @ (r0,r3) = s0*T[0] 10937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r0, r4, r11,r12 @ (r0,r4) += s1*T[1] = s2 10947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r3, r3, r4 10957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r0, r14,r11,r14 @ (r0,r14) = s1*T[0] 10967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r0, r12,r6, r12 @ (r0,r12) += s0*T[1] = s3 10977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r14,r14,r12 10987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 10997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r9 = s0b<<1 11007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r10= s1b<<1 11017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r9, r3, r9, ASR #1 @ r9 = s0b + s2 11027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r3, r9, r3, LSL #1 @ r3 = s0b - s2 11037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 11047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SUB r12,r14,r10,ASR #1 @ r12= s3 - s1b 11057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r10,r14,r10,ASR #1 @ r10= s3 + s1b 11067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r9, [r1],#4 11077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r10,[r1],#4 @ w0 += 2 11087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r3, [r7] 11097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r12,[r7,#4] 11107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 11117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r1,r7 11127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLT step7_loop2 11137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 11147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13!,{r0-r3} 11157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 11167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r0 = points 11177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r1 = in 11187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r2 = step 11197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ r3 = shift 11207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r2, r2, ASR #2 @ r2 = step >>= 2 11217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r2, #0 11227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMPNE r2, #1 11237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BEQ mdct_end 11247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 11257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ step > 1 (default case) 11267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r2, #4 @ r5 = T = (step>=4) ? 1127e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADR r7, .Lsincos_lookup @ sincos_lookup0 + 1128e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADDLT r7, #4 @ sincos_lookup1 1129e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel LDR r5, [r7] 1130e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel ADD r5, r7 11317913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADD r7, r1, r0, LSL #1 @ r7 = iX = x + (n>>1) 11327913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang ADDGE r5, r5, r2, LSL #1 @ (step>>1) 11337913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_step8_default: 11347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r6, [r1],#4 @ r6 = s0 = x[0] 11357913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r8, [r1],#4 @ r8 = -s1 = x[1] 11367913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r12,[r5,#4] @ r12= T[1] 11377913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDR r14,[r5],r2,LSL #2 @ r14= T[0] T += step 11387913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r8, r8, #0 @ r8 = s1 11397913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 11407913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ XPROD31(s0, s1, T[0], T[1], x, x+1) 11417913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ x[0] = s0 * T[0] + s1 * T[1] x[1] = s1 * T[0] - s0 * T[1] 11427913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r9, r10, r8, r12 @ (r9,r10) = s1 * T[1] 11437913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang CMP r1, r7 11447913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r9, r10, r6, r14 @ (r9,r10) += s0 * T[0] 11457913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang RSB r6, r6, #0 @ r6 = -s0 11467913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMULL r9, r11, r8, r14 @ (r9,r11) = s1 * T[0] 11477913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r10,r10,LSL #1 11487913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang SMLAL r9, r11, r6, r12 @ (r9,r11) -= s0 * T[1] 11497913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r10,[r1,#-8] 11507913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r11,r11,LSL #1 11517913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang STR r11,[r1,#-4] 11527913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang BLT mdct_step8_default 11537913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 11547913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangmdct_end: 11557913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang MOV r0, r2 11567913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang LDMFD r13!,{r4-r11,PC} 11577913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 11587913073ddf11ca3dd7b0439998e1b17d443bb0baGloria WangcPI1_8: 11597913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .word 0x7641af3d 11607913073ddf11ca3dd7b0439998e1b17d443bb0baGloria WangcPI2_8: 11617913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .word 0x5a82799a 11627913073ddf11ca3dd7b0439998e1b17d443bb0baGloria WangcPI3_8: 11637913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .word 0x30fbc54d 11647913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wangbitrev: 11657913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 0 11667913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 32 11677913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 16 11687913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 48 11697913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 8 11707913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 40 11717913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 24 11727913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 56 11737913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 4 11747913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 36 11757913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 20 11767913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 52 11777913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 12 11787913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 44 11797913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 28 11807913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 60 11817913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 2 11827913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 34 11837913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 18 11847913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 50 11857913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 10 11867913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 42 11877913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 26 11887913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 58 11897913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 6 11907913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 38 11917913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 22 11927913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 54 11937913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 14 11947913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 46 11957913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 30 11967913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 62 11977913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 1 11987913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 33 11997913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 17 12007913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 49 12017913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 9 12027913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 41 12037913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 25 12047913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 57 12057913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 5 12067913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 37 12077913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 21 12087913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 53 12097913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 13 12107913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 45 12117913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 29 12127913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 61 12137913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 3 12147913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 35 12157913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 19 12167913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 51 12177913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 11 12187913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 43 12197913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 27 12207913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 59 12217913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 7 12227913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 39 12237913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 23 12247913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 55 12257913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 15 12267913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 47 12277913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 31 12287913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang .byte 63 12297913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang 1230e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel.Lsincos_lookup: 1231e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel .word sincos_lookup0-.Lsincos_lookup 1232e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel .word sincos_lookup1-(.Lsincos_lookup+4) 1233e96d449a28f9679ca1ac22e21bd1cf1d68d2cb4fArd Biesheuvel 12347913073ddf11ca3dd7b0439998e1b17d443bb0baGloria Wang @ END 1235