11b362b15af34006e6a11974088a46d42b903418eJohann; 21b362b15af34006e6a11974088a46d42b903418eJohann; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 31b362b15af34006e6a11974088a46d42b903418eJohann; 41b362b15af34006e6a11974088a46d42b903418eJohann; Use of this source code is governed by a BSD-style license 51b362b15af34006e6a11974088a46d42b903418eJohann; that can be found in the LICENSE file in the root of the source 61b362b15af34006e6a11974088a46d42b903418eJohann; tree. An additional intellectual property rights grant can be found 71b362b15af34006e6a11974088a46d42b903418eJohann; in the file PATENTS. All contributing project authors may 81b362b15af34006e6a11974088a46d42b903418eJohann; be found in the AUTHORS file in the root of the source tree. 91b362b15af34006e6a11974088a46d42b903418eJohann; 101b362b15af34006e6a11974088a46d42b903418eJohann 111b362b15af34006e6a11974088a46d42b903418eJohann 121b362b15af34006e6a11974088a46d42b903418eJohann EXPORT |vp8_short_idct4x4llm_v6_dual| 131b362b15af34006e6a11974088a46d42b903418eJohann 141b362b15af34006e6a11974088a46d42b903418eJohann AREA |.text|, CODE, READONLY 151b362b15af34006e6a11974088a46d42b903418eJohann 161b362b15af34006e6a11974088a46d42b903418eJohann 171b362b15af34006e6a11974088a46d42b903418eJohann; void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, 181b362b15af34006e6a11974088a46d42b903418eJohann; unsigned char *dst, int stride) 191b362b15af34006e6a11974088a46d42b903418eJohann; r0 short* input 201b362b15af34006e6a11974088a46d42b903418eJohann; r1 unsigned char* pred 211b362b15af34006e6a11974088a46d42b903418eJohann; r2 int pitch 221b362b15af34006e6a11974088a46d42b903418eJohann; r3 unsigned char* dst 231b362b15af34006e6a11974088a46d42b903418eJohann; sp int stride 241b362b15af34006e6a11974088a46d42b903418eJohann 251b362b15af34006e6a11974088a46d42b903418eJohann|vp8_short_idct4x4llm_v6_dual| PROC 261b362b15af34006e6a11974088a46d42b903418eJohann stmdb sp!, {r4-r11, lr} 271b362b15af34006e6a11974088a46d42b903418eJohann 281b362b15af34006e6a11974088a46d42b903418eJohann sub sp, sp, #4 291b362b15af34006e6a11974088a46d42b903418eJohann 301b362b15af34006e6a11974088a46d42b903418eJohann mov r4, #0x00008A00 ; sin 311b362b15af34006e6a11974088a46d42b903418eJohann orr r4, r4, #0x0000008C ; sinpi8sqrt2 321b362b15af34006e6a11974088a46d42b903418eJohann 331b362b15af34006e6a11974088a46d42b903418eJohann mov r5, #0x00004E00 ; cos 341b362b15af34006e6a11974088a46d42b903418eJohann orr r5, r5, #0x0000007B ; cospi8sqrt2minus1 351b362b15af34006e6a11974088a46d42b903418eJohann orr r5, r5, #1<<31 ; loop counter on top bit 361b362b15af34006e6a11974088a46d42b903418eJohann 371b362b15af34006e6a11974088a46d42b903418eJohannloop1_dual 381b362b15af34006e6a11974088a46d42b903418eJohann ldr r6, [r0, #(4*2)] ; i5 | i4 391b362b15af34006e6a11974088a46d42b903418eJohann ldr r12, [r0, #(12*2)] ; i13|i12 401b362b15af34006e6a11974088a46d42b903418eJohann ldr r14, [r0, #(8*2)] ; i9 | i8 411b362b15af34006e6a11974088a46d42b903418eJohann 421b362b15af34006e6a11974088a46d42b903418eJohann smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 431b362b15af34006e6a11974088a46d42b903418eJohann smulbb r7, r5, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 441b362b15af34006e6a11974088a46d42b903418eJohann smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 451b362b15af34006e6a11974088a46d42b903418eJohann smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 461b362b15af34006e6a11974088a46d42b903418eJohann 471b362b15af34006e6a11974088a46d42b903418eJohann smulbt r11, r5, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 481b362b15af34006e6a11974088a46d42b903418eJohann pkhtb r7, r9, r7, asr #16 ; 5c | 4c 491b362b15af34006e6a11974088a46d42b903418eJohann pkhbt r8, r8, r10, lsl #16 ; 5s | 4s 501b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r6, r6, r7 ; 5c+5 | 4c+4 511b362b15af34006e6a11974088a46d42b903418eJohann 521b362b15af34006e6a11974088a46d42b903418eJohann smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 531b362b15af34006e6a11974088a46d42b903418eJohann smulbb r9, r5, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 541b362b15af34006e6a11974088a46d42b903418eJohann smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 551b362b15af34006e6a11974088a46d42b903418eJohann 561b362b15af34006e6a11974088a46d42b903418eJohann subs r5, r5, #1<<31 ; i-- 571b362b15af34006e6a11974088a46d42b903418eJohann 581b362b15af34006e6a11974088a46d42b903418eJohann pkhtb r9, r11, r9, asr #16 ; 13c | 12c 591b362b15af34006e6a11974088a46d42b903418eJohann ldr r11, [r0] ; i1 | i0 601b362b15af34006e6a11974088a46d42b903418eJohann pkhbt r10, r10, r7, lsl #16 ; 13s | 12s 611b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r7, r12, r9 ; 13c+13 | 12c+12 621b362b15af34006e6a11974088a46d42b903418eJohann 631b362b15af34006e6a11974088a46d42b903418eJohann usub16 r7, r8, r7 ; c 641b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r6, r6, r10 ; d 651b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r10, r11, r14 ; a 661b362b15af34006e6a11974088a46d42b903418eJohann usub16 r8, r11, r14 ; b 671b362b15af34006e6a11974088a46d42b903418eJohann 681b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r9, r10, r6 ; a+d 691b362b15af34006e6a11974088a46d42b903418eJohann usub16 r10, r10, r6 ; a-d 701b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r6, r8, r7 ; b+c 711b362b15af34006e6a11974088a46d42b903418eJohann usub16 r7, r8, r7 ; b-c 721b362b15af34006e6a11974088a46d42b903418eJohann 731b362b15af34006e6a11974088a46d42b903418eJohann ; use input buffer to store intermediate results 741b362b15af34006e6a11974088a46d42b903418eJohann str r6, [r0, #(4*2)] ; o5 | o4 751b362b15af34006e6a11974088a46d42b903418eJohann str r7, [r0, #(8*2)] ; o9 | o8 761b362b15af34006e6a11974088a46d42b903418eJohann str r10,[r0, #(12*2)] ; o13|o12 771b362b15af34006e6a11974088a46d42b903418eJohann str r9, [r0], #4 ; o1 | o0 781b362b15af34006e6a11974088a46d42b903418eJohann 791b362b15af34006e6a11974088a46d42b903418eJohann bcs loop1_dual 801b362b15af34006e6a11974088a46d42b903418eJohann 811b362b15af34006e6a11974088a46d42b903418eJohann sub r0, r0, #8 ; reset input/output 821b362b15af34006e6a11974088a46d42b903418eJohann str r0, [sp] 831b362b15af34006e6a11974088a46d42b903418eJohann 841b362b15af34006e6a11974088a46d42b903418eJohannloop2_dual 851b362b15af34006e6a11974088a46d42b903418eJohann 861b362b15af34006e6a11974088a46d42b903418eJohann ldr r6, [r0, #(4*2)] ; i5 | i4 871b362b15af34006e6a11974088a46d42b903418eJohann ldr r12,[r0, #(2*2)] ; i3 | i2 881b362b15af34006e6a11974088a46d42b903418eJohann ldr r14,[r0, #(6*2)] ; i7 | i6 891b362b15af34006e6a11974088a46d42b903418eJohann ldr r0, [r0, #(0*2)] ; i1 | i0 901b362b15af34006e6a11974088a46d42b903418eJohann 911b362b15af34006e6a11974088a46d42b903418eJohann smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 921b362b15af34006e6a11974088a46d42b903418eJohann smulbt r7, r5, r0 ; (ip[1] * cospi8sqrt2minus1) >> 16 931b362b15af34006e6a11974088a46d42b903418eJohann smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 941b362b15af34006e6a11974088a46d42b903418eJohann smulwt r8, r4, r0 ; (ip[1] * sinpi8sqrt2) >> 16 951b362b15af34006e6a11974088a46d42b903418eJohann 961b362b15af34006e6a11974088a46d42b903418eJohann pkhbt r11, r6, r0, lsl #16 ; i0 | i4 971b362b15af34006e6a11974088a46d42b903418eJohann pkhtb r7, r7, r9, asr #16 ; 1c | 5c 981b362b15af34006e6a11974088a46d42b903418eJohann pkhtb r0, r0, r6, asr #16 ; i1 | i5 991b362b15af34006e6a11974088a46d42b903418eJohann pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 1001b362b15af34006e6a11974088a46d42b903418eJohann 1011b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r0, r7, r0 ; 1c+1 | 5c+5 = temp2 1021b362b15af34006e6a11974088a46d42b903418eJohann pkhbt r9, r14, r12, lsl #16 ; i2 | i6 1031b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r10, r11, r9 ; a 1041b362b15af34006e6a11974088a46d42b903418eJohann usub16 r9, r11, r9 ; b 1051b362b15af34006e6a11974088a46d42b903418eJohann pkhtb r6, r12, r14, asr #16 ; i3 | i7 1061b362b15af34006e6a11974088a46d42b903418eJohann 1071b362b15af34006e6a11974088a46d42b903418eJohann subs r5, r5, #1<<31 ; i-- 1081b362b15af34006e6a11974088a46d42b903418eJohann 1091b362b15af34006e6a11974088a46d42b903418eJohann smulbt r7, r5, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 1101b362b15af34006e6a11974088a46d42b903418eJohann smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 1111b362b15af34006e6a11974088a46d42b903418eJohann smulbb r12, r5, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 1121b362b15af34006e6a11974088a46d42b903418eJohann smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 1131b362b15af34006e6a11974088a46d42b903418eJohann 1141b362b15af34006e6a11974088a46d42b903418eJohann pkhtb r7, r7, r12, asr #16 ; 3c | 7c 1151b362b15af34006e6a11974088a46d42b903418eJohann pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 1161b362b15af34006e6a11974088a46d42b903418eJohann 1171b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 1181b362b15af34006e6a11974088a46d42b903418eJohann usub16 r12, r8, r6 ; c (o1 | o5) 1191b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r6, r11, r0 ; d (o3 | o7) 1201b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r7, r10, r6 ; a+d 1211b362b15af34006e6a11974088a46d42b903418eJohann 1221b362b15af34006e6a11974088a46d42b903418eJohann mov r8, #4 ; set up 4's 1231b362b15af34006e6a11974088a46d42b903418eJohann orr r8, r8, #0x40000 ; 4|4 1241b362b15af34006e6a11974088a46d42b903418eJohann 1251b362b15af34006e6a11974088a46d42b903418eJohann usub16 r6, r10, r6 ; a-d 1261b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r6, r6, r8 ; a-d+4, 3|7 1271b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r7, r7, r8 ; a+d+4, 0|4 1281b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r10, r9, r12 ; b+c 1291b362b15af34006e6a11974088a46d42b903418eJohann usub16 r0, r9, r12 ; b-c 1301b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r10, r10, r8 ; b+c+4, 1|5 1311b362b15af34006e6a11974088a46d42b903418eJohann uadd16 r8, r0, r8 ; b-c+4, 2|6 1321b362b15af34006e6a11974088a46d42b903418eJohann 1331b362b15af34006e6a11974088a46d42b903418eJohann ldr lr, [sp, #40] ; dst stride 1341b362b15af34006e6a11974088a46d42b903418eJohann 1351b362b15af34006e6a11974088a46d42b903418eJohann ldrb r0, [r1] ; pred p0 1361b362b15af34006e6a11974088a46d42b903418eJohann ldrb r11, [r1, #1] ; pred p1 1371b362b15af34006e6a11974088a46d42b903418eJohann ldrb r12, [r1, #2] ; pred p2 1381b362b15af34006e6a11974088a46d42b903418eJohann 1391b362b15af34006e6a11974088a46d42b903418eJohann add r0, r0, r7, asr #19 ; p0 + o0 1401b362b15af34006e6a11974088a46d42b903418eJohann add r11, r11, r10, asr #19 ; p1 + o1 1411b362b15af34006e6a11974088a46d42b903418eJohann add r12, r12, r8, asr #19 ; p2 + o2 1421b362b15af34006e6a11974088a46d42b903418eJohann 1431b362b15af34006e6a11974088a46d42b903418eJohann usat r0, #8, r0 ; d0 = clip8(p0 + o0) 1441b362b15af34006e6a11974088a46d42b903418eJohann usat r11, #8, r11 ; d1 = clip8(p1 + o1) 1451b362b15af34006e6a11974088a46d42b903418eJohann usat r12, #8, r12 ; d2 = clip8(p2 + o2) 1461b362b15af34006e6a11974088a46d42b903418eJohann 1471b362b15af34006e6a11974088a46d42b903418eJohann add r0, r0, r11, lsl #8 ; |--|--|d1|d0| 1481b362b15af34006e6a11974088a46d42b903418eJohann 1491b362b15af34006e6a11974088a46d42b903418eJohann ldrb r11, [r1, #3] ; pred p3 1501b362b15af34006e6a11974088a46d42b903418eJohann 1511b362b15af34006e6a11974088a46d42b903418eJohann add r0, r0, r12, lsl #16 ; |--|d2|d1|d0| 1521b362b15af34006e6a11974088a46d42b903418eJohann 1531b362b15af34006e6a11974088a46d42b903418eJohann add r11, r11, r6, asr #19 ; p3 + o3 1541b362b15af34006e6a11974088a46d42b903418eJohann 1551b362b15af34006e6a11974088a46d42b903418eJohann sxth r7, r7 ; 1561b362b15af34006e6a11974088a46d42b903418eJohann sxth r10, r10 ; 1571b362b15af34006e6a11974088a46d42b903418eJohann 1581b362b15af34006e6a11974088a46d42b903418eJohann usat r11, #8, r11 ; d3 = clip8(p3 + o3) 1591b362b15af34006e6a11974088a46d42b903418eJohann 1601b362b15af34006e6a11974088a46d42b903418eJohann sxth r8, r8 ; 1611b362b15af34006e6a11974088a46d42b903418eJohann sxth r6, r6 ; 1621b362b15af34006e6a11974088a46d42b903418eJohann 1631b362b15af34006e6a11974088a46d42b903418eJohann add r0, r0, r11, lsl #24 ; |d3|d2|d1|d0| 1641b362b15af34006e6a11974088a46d42b903418eJohann 1651b362b15af34006e6a11974088a46d42b903418eJohann ldrb r12, [r1, r2]! ; pred p4 1661b362b15af34006e6a11974088a46d42b903418eJohann str r0, [r3], lr 1671b362b15af34006e6a11974088a46d42b903418eJohann ldrb r11, [r1, #1] ; pred p5 1681b362b15af34006e6a11974088a46d42b903418eJohann 1691b362b15af34006e6a11974088a46d42b903418eJohann add r12, r12, r7, asr #3 ; p4 + o4 1701b362b15af34006e6a11974088a46d42b903418eJohann add r11, r11, r10, asr #3 ; p5 + o5 1711b362b15af34006e6a11974088a46d42b903418eJohann 1721b362b15af34006e6a11974088a46d42b903418eJohann usat r12, #8, r12 ; d4 = clip8(p4 + o4) 1731b362b15af34006e6a11974088a46d42b903418eJohann usat r11, #8, r11 ; d5 = clip8(p5 + o5) 1741b362b15af34006e6a11974088a46d42b903418eJohann 1751b362b15af34006e6a11974088a46d42b903418eJohann ldrb r7, [r1, #2] ; pred p6 1761b362b15af34006e6a11974088a46d42b903418eJohann ldrb r10, [r1, #3] ; pred p6 1771b362b15af34006e6a11974088a46d42b903418eJohann 1781b362b15af34006e6a11974088a46d42b903418eJohann add r12, r12, r11, lsl #8 ; |--|--|d5|d4| 1791b362b15af34006e6a11974088a46d42b903418eJohann 1801b362b15af34006e6a11974088a46d42b903418eJohann add r7, r7, r8, asr #3 ; p6 + o6 1811b362b15af34006e6a11974088a46d42b903418eJohann add r10, r10, r6, asr #3 ; p7 + o7 1821b362b15af34006e6a11974088a46d42b903418eJohann 1831b362b15af34006e6a11974088a46d42b903418eJohann ldr r0, [sp] ; load input pointer 1841b362b15af34006e6a11974088a46d42b903418eJohann 1851b362b15af34006e6a11974088a46d42b903418eJohann usat r7, #8, r7 ; d6 = clip8(p6 + o6) 1861b362b15af34006e6a11974088a46d42b903418eJohann usat r10, #8, r10 ; d7 = clip8(p7 + o7) 1871b362b15af34006e6a11974088a46d42b903418eJohann 1881b362b15af34006e6a11974088a46d42b903418eJohann add r12, r12, r7, lsl #16 ; |--|d6|d5|d4| 1891b362b15af34006e6a11974088a46d42b903418eJohann add r12, r12, r10, lsl #24 ; |d7|d6|d5|d4| 1901b362b15af34006e6a11974088a46d42b903418eJohann 1911b362b15af34006e6a11974088a46d42b903418eJohann str r12, [r3], lr 1921b362b15af34006e6a11974088a46d42b903418eJohann add r0, r0, #16 1931b362b15af34006e6a11974088a46d42b903418eJohann add r1, r1, r2 ; pred + pitch 1941b362b15af34006e6a11974088a46d42b903418eJohann 1951b362b15af34006e6a11974088a46d42b903418eJohann bcs loop2_dual 1961b362b15af34006e6a11974088a46d42b903418eJohann 1971b362b15af34006e6a11974088a46d42b903418eJohann add sp, sp, #4 ; idct_output buffer 1981b362b15af34006e6a11974088a46d42b903418eJohann ldmia sp!, {r4 - r11, pc} 1991b362b15af34006e6a11974088a46d42b903418eJohann 2001b362b15af34006e6a11974088a46d42b903418eJohann ENDP 2011b362b15af34006e6a11974088a46d42b903418eJohann 2021b362b15af34006e6a11974088a46d42b903418eJohann END 203