11b362b15af34006e6a11974088a46d42b903418eJohann;
21b362b15af34006e6a11974088a46d42b903418eJohann;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
31b362b15af34006e6a11974088a46d42b903418eJohann;
41b362b15af34006e6a11974088a46d42b903418eJohann;  Use of this source code is governed by a BSD-style license
51b362b15af34006e6a11974088a46d42b903418eJohann;  that can be found in the LICENSE file in the root of the source
61b362b15af34006e6a11974088a46d42b903418eJohann;  tree. An additional intellectual property rights grant can be found
71b362b15af34006e6a11974088a46d42b903418eJohann;  in the file PATENTS.  All contributing project authors may
81b362b15af34006e6a11974088a46d42b903418eJohann;  be found in the AUTHORS file in the root of the source tree.
91b362b15af34006e6a11974088a46d42b903418eJohann;
101b362b15af34006e6a11974088a46d42b903418eJohann
111b362b15af34006e6a11974088a46d42b903418eJohann
121b362b15af34006e6a11974088a46d42b903418eJohann    EXPORT  |vp8_short_idct4x4llm_v6_dual|
131b362b15af34006e6a11974088a46d42b903418eJohann
141b362b15af34006e6a11974088a46d42b903418eJohann    AREA    |.text|, CODE, READONLY
151b362b15af34006e6a11974088a46d42b903418eJohann
161b362b15af34006e6a11974088a46d42b903418eJohann
171b362b15af34006e6a11974088a46d42b903418eJohann; void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
181b362b15af34006e6a11974088a46d42b903418eJohann;                             unsigned char *dst, int stride)
191b362b15af34006e6a11974088a46d42b903418eJohann; r0    short* input
201b362b15af34006e6a11974088a46d42b903418eJohann; r1    unsigned char* pred
211b362b15af34006e6a11974088a46d42b903418eJohann; r2    int pitch
221b362b15af34006e6a11974088a46d42b903418eJohann; r3    unsigned char* dst
231b362b15af34006e6a11974088a46d42b903418eJohann; sp    int stride
241b362b15af34006e6a11974088a46d42b903418eJohann
251b362b15af34006e6a11974088a46d42b903418eJohann|vp8_short_idct4x4llm_v6_dual| PROC
261b362b15af34006e6a11974088a46d42b903418eJohann    stmdb   sp!, {r4-r11, lr}
271b362b15af34006e6a11974088a46d42b903418eJohann
281b362b15af34006e6a11974088a46d42b903418eJohann    sub     sp, sp, #4
291b362b15af34006e6a11974088a46d42b903418eJohann
301b362b15af34006e6a11974088a46d42b903418eJohann    mov     r4, #0x00008A00         ; sin
311b362b15af34006e6a11974088a46d42b903418eJohann    orr     r4, r4, #0x0000008C     ; sinpi8sqrt2
321b362b15af34006e6a11974088a46d42b903418eJohann
331b362b15af34006e6a11974088a46d42b903418eJohann    mov     r5, #0x00004E00         ; cos
341b362b15af34006e6a11974088a46d42b903418eJohann    orr     r5, r5, #0x0000007B     ; cospi8sqrt2minus1
351b362b15af34006e6a11974088a46d42b903418eJohann    orr     r5, r5, #1<<31          ; loop counter on top bit
361b362b15af34006e6a11974088a46d42b903418eJohann
371b362b15af34006e6a11974088a46d42b903418eJohannloop1_dual
381b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r6, [r0, #(4*2)]        ; i5 | i4
391b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r12, [r0, #(12*2)]      ; i13|i12
401b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r14, [r0, #(8*2)]       ; i9 | i8
411b362b15af34006e6a11974088a46d42b903418eJohann
421b362b15af34006e6a11974088a46d42b903418eJohann    smulbt  r9, r5, r6              ; (ip[5] * cospi8sqrt2minus1) >> 16
431b362b15af34006e6a11974088a46d42b903418eJohann    smulbb  r7, r5, r6              ; (ip[4] * cospi8sqrt2minus1) >> 16
441b362b15af34006e6a11974088a46d42b903418eJohann    smulwt  r10, r4, r6             ; (ip[5] * sinpi8sqrt2) >> 16
451b362b15af34006e6a11974088a46d42b903418eJohann    smulwb  r8, r4, r6              ; (ip[4] * sinpi8sqrt2) >> 16
461b362b15af34006e6a11974088a46d42b903418eJohann
471b362b15af34006e6a11974088a46d42b903418eJohann    smulbt  r11, r5, r12            ; (ip[13] * cospi8sqrt2minus1) >> 16
481b362b15af34006e6a11974088a46d42b903418eJohann    pkhtb   r7, r9, r7, asr #16     ; 5c | 4c
491b362b15af34006e6a11974088a46d42b903418eJohann    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
501b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r6, r6, r7              ; 5c+5 | 4c+4
511b362b15af34006e6a11974088a46d42b903418eJohann
521b362b15af34006e6a11974088a46d42b903418eJohann    smulwt  r7, r4, r12             ; (ip[13] * sinpi8sqrt2) >> 16
531b362b15af34006e6a11974088a46d42b903418eJohann    smulbb  r9, r5, r12             ; (ip[12] * cospi8sqrt2minus1) >> 16
541b362b15af34006e6a11974088a46d42b903418eJohann    smulwb  r10, r4, r12            ; (ip[12] * sinpi8sqrt2) >> 16
551b362b15af34006e6a11974088a46d42b903418eJohann
561b362b15af34006e6a11974088a46d42b903418eJohann    subs    r5, r5, #1<<31          ; i--
571b362b15af34006e6a11974088a46d42b903418eJohann
581b362b15af34006e6a11974088a46d42b903418eJohann    pkhtb   r9, r11, r9, asr #16    ; 13c | 12c
591b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r11, [r0]               ; i1 | i0
601b362b15af34006e6a11974088a46d42b903418eJohann    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
611b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r7, r12, r9             ; 13c+13 | 12c+12
621b362b15af34006e6a11974088a46d42b903418eJohann
631b362b15af34006e6a11974088a46d42b903418eJohann    usub16  r7, r8, r7              ; c
641b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r6, r6, r10             ; d
651b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r10, r11, r14           ; a
661b362b15af34006e6a11974088a46d42b903418eJohann    usub16  r8, r11, r14            ; b
671b362b15af34006e6a11974088a46d42b903418eJohann
681b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r9, r10, r6             ; a+d
691b362b15af34006e6a11974088a46d42b903418eJohann    usub16  r10, r10, r6            ; a-d
701b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r6, r8, r7              ; b+c
711b362b15af34006e6a11974088a46d42b903418eJohann    usub16  r7, r8, r7              ; b-c
721b362b15af34006e6a11974088a46d42b903418eJohann
731b362b15af34006e6a11974088a46d42b903418eJohann    ; use input buffer to store intermediate results
741b362b15af34006e6a11974088a46d42b903418eJohann    str      r6, [r0, #(4*2)]       ; o5 | o4
751b362b15af34006e6a11974088a46d42b903418eJohann    str      r7, [r0, #(8*2)]       ; o9 | o8
761b362b15af34006e6a11974088a46d42b903418eJohann    str      r10,[r0, #(12*2)]      ; o13|o12
771b362b15af34006e6a11974088a46d42b903418eJohann    str      r9, [r0], #4           ; o1 | o0
781b362b15af34006e6a11974088a46d42b903418eJohann
791b362b15af34006e6a11974088a46d42b903418eJohann    bcs loop1_dual
801b362b15af34006e6a11974088a46d42b903418eJohann
811b362b15af34006e6a11974088a46d42b903418eJohann    sub     r0, r0, #8              ; reset input/output
821b362b15af34006e6a11974088a46d42b903418eJohann    str     r0, [sp]
831b362b15af34006e6a11974088a46d42b903418eJohann
841b362b15af34006e6a11974088a46d42b903418eJohannloop2_dual
851b362b15af34006e6a11974088a46d42b903418eJohann
861b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r6, [r0, #(4*2)]        ; i5 | i4
871b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r12,[r0, #(2*2)]        ; i3 | i2
881b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r14,[r0, #(6*2)]        ; i7 | i6
891b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r0, [r0, #(0*2)]        ; i1 | i0
901b362b15af34006e6a11974088a46d42b903418eJohann
911b362b15af34006e6a11974088a46d42b903418eJohann    smulbt  r9, r5, r6              ; (ip[5] * cospi8sqrt2minus1) >> 16
921b362b15af34006e6a11974088a46d42b903418eJohann    smulbt  r7, r5, r0              ; (ip[1] * cospi8sqrt2minus1) >> 16
931b362b15af34006e6a11974088a46d42b903418eJohann    smulwt  r10, r4, r6             ; (ip[5] * sinpi8sqrt2) >> 16
941b362b15af34006e6a11974088a46d42b903418eJohann    smulwt  r8, r4, r0              ; (ip[1] * sinpi8sqrt2) >> 16
951b362b15af34006e6a11974088a46d42b903418eJohann
961b362b15af34006e6a11974088a46d42b903418eJohann    pkhbt   r11, r6, r0, lsl #16    ; i0 | i4
971b362b15af34006e6a11974088a46d42b903418eJohann    pkhtb   r7, r7, r9, asr #16     ; 1c | 5c
981b362b15af34006e6a11974088a46d42b903418eJohann    pkhtb   r0, r0, r6, asr #16     ; i1 | i5
991b362b15af34006e6a11974088a46d42b903418eJohann    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1
1001b362b15af34006e6a11974088a46d42b903418eJohann
1011b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r0, r7, r0              ; 1c+1 | 5c+5 = temp2
1021b362b15af34006e6a11974088a46d42b903418eJohann    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6
1031b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r10, r11, r9            ; a
1041b362b15af34006e6a11974088a46d42b903418eJohann    usub16  r9, r11, r9             ; b
1051b362b15af34006e6a11974088a46d42b903418eJohann    pkhtb   r6, r12, r14, asr #16   ; i3 | i7
1061b362b15af34006e6a11974088a46d42b903418eJohann
1071b362b15af34006e6a11974088a46d42b903418eJohann    subs    r5, r5, #1<<31          ; i--
1081b362b15af34006e6a11974088a46d42b903418eJohann
1091b362b15af34006e6a11974088a46d42b903418eJohann    smulbt  r7, r5, r6              ; (ip[3] * cospi8sqrt2minus1) >> 16
1101b362b15af34006e6a11974088a46d42b903418eJohann    smulwt  r11, r4, r6             ; (ip[3] * sinpi8sqrt2) >> 16
1111b362b15af34006e6a11974088a46d42b903418eJohann    smulbb  r12, r5, r6             ; (ip[7] * cospi8sqrt2minus1) >> 16
1121b362b15af34006e6a11974088a46d42b903418eJohann    smulwb  r14, r4, r6             ; (ip[7] * sinpi8sqrt2) >> 16
1131b362b15af34006e6a11974088a46d42b903418eJohann
1141b362b15af34006e6a11974088a46d42b903418eJohann    pkhtb   r7, r7, r12, asr #16    ; 3c | 7c
1151b362b15af34006e6a11974088a46d42b903418eJohann    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1
1161b362b15af34006e6a11974088a46d42b903418eJohann
1171b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r6, r7, r6              ; 3c+3 | 7c+7 = temp2
1181b362b15af34006e6a11974088a46d42b903418eJohann    usub16  r12, r8, r6             ; c (o1 | o5)
1191b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r6, r11, r0             ; d (o3 | o7)
1201b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r7, r10, r6             ; a+d
1211b362b15af34006e6a11974088a46d42b903418eJohann
1221b362b15af34006e6a11974088a46d42b903418eJohann    mov     r8, #4                  ; set up 4's
1231b362b15af34006e6a11974088a46d42b903418eJohann    orr     r8, r8, #0x40000        ; 4|4
1241b362b15af34006e6a11974088a46d42b903418eJohann
1251b362b15af34006e6a11974088a46d42b903418eJohann    usub16  r6, r10, r6             ; a-d
1261b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r6, r6, r8              ; a-d+4, 3|7
1271b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r7, r7, r8              ; a+d+4, 0|4
1281b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r10, r9, r12            ; b+c
1291b362b15af34006e6a11974088a46d42b903418eJohann    usub16  r0, r9, r12             ; b-c
1301b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r10, r10, r8            ; b+c+4, 1|5
1311b362b15af34006e6a11974088a46d42b903418eJohann    uadd16  r8, r0, r8              ; b-c+4, 2|6
1321b362b15af34006e6a11974088a46d42b903418eJohann
1331b362b15af34006e6a11974088a46d42b903418eJohann    ldr     lr, [sp, #40]           ; dst stride
1341b362b15af34006e6a11974088a46d42b903418eJohann
1351b362b15af34006e6a11974088a46d42b903418eJohann    ldrb    r0, [r1]                ; pred p0
1361b362b15af34006e6a11974088a46d42b903418eJohann    ldrb    r11, [r1, #1]           ; pred p1
1371b362b15af34006e6a11974088a46d42b903418eJohann    ldrb    r12, [r1, #2]           ; pred p2
1381b362b15af34006e6a11974088a46d42b903418eJohann
1391b362b15af34006e6a11974088a46d42b903418eJohann    add     r0, r0, r7, asr #19     ; p0 + o0
1401b362b15af34006e6a11974088a46d42b903418eJohann    add     r11, r11, r10, asr #19  ; p1 + o1
1411b362b15af34006e6a11974088a46d42b903418eJohann    add     r12, r12, r8, asr #19   ; p2 + o2
1421b362b15af34006e6a11974088a46d42b903418eJohann
1431b362b15af34006e6a11974088a46d42b903418eJohann    usat    r0, #8, r0              ; d0 = clip8(p0 + o0)
1441b362b15af34006e6a11974088a46d42b903418eJohann    usat    r11, #8, r11            ; d1 = clip8(p1 + o1)
1451b362b15af34006e6a11974088a46d42b903418eJohann    usat    r12, #8, r12            ; d2 = clip8(p2 + o2)
1461b362b15af34006e6a11974088a46d42b903418eJohann
1471b362b15af34006e6a11974088a46d42b903418eJohann    add     r0, r0, r11, lsl #8     ; |--|--|d1|d0|
1481b362b15af34006e6a11974088a46d42b903418eJohann
1491b362b15af34006e6a11974088a46d42b903418eJohann    ldrb    r11, [r1, #3]           ; pred p3
1501b362b15af34006e6a11974088a46d42b903418eJohann
1511b362b15af34006e6a11974088a46d42b903418eJohann    add     r0, r0, r12, lsl #16    ; |--|d2|d1|d0|
1521b362b15af34006e6a11974088a46d42b903418eJohann
1531b362b15af34006e6a11974088a46d42b903418eJohann    add     r11, r11, r6, asr #19   ; p3 + o3
1541b362b15af34006e6a11974088a46d42b903418eJohann
1551b362b15af34006e6a11974088a46d42b903418eJohann    sxth    r7, r7                  ;
1561b362b15af34006e6a11974088a46d42b903418eJohann    sxth    r10, r10                ;
1571b362b15af34006e6a11974088a46d42b903418eJohann
1581b362b15af34006e6a11974088a46d42b903418eJohann    usat    r11, #8, r11            ; d3 = clip8(p3 + o3)
1591b362b15af34006e6a11974088a46d42b903418eJohann
1601b362b15af34006e6a11974088a46d42b903418eJohann    sxth    r8, r8                  ;
1611b362b15af34006e6a11974088a46d42b903418eJohann    sxth    r6, r6                  ;
1621b362b15af34006e6a11974088a46d42b903418eJohann
1631b362b15af34006e6a11974088a46d42b903418eJohann    add     r0, r0, r11, lsl #24    ; |d3|d2|d1|d0|
1641b362b15af34006e6a11974088a46d42b903418eJohann
1651b362b15af34006e6a11974088a46d42b903418eJohann    ldrb    r12, [r1, r2]!          ; pred p4
1661b362b15af34006e6a11974088a46d42b903418eJohann    str     r0, [r3], lr
1671b362b15af34006e6a11974088a46d42b903418eJohann    ldrb    r11, [r1, #1]           ; pred p5
1681b362b15af34006e6a11974088a46d42b903418eJohann
1691b362b15af34006e6a11974088a46d42b903418eJohann    add     r12, r12, r7, asr #3    ; p4 + o4
1701b362b15af34006e6a11974088a46d42b903418eJohann    add     r11, r11, r10, asr #3   ; p5 + o5
1711b362b15af34006e6a11974088a46d42b903418eJohann
1721b362b15af34006e6a11974088a46d42b903418eJohann    usat    r12, #8, r12            ; d4 = clip8(p4 + o4)
1731b362b15af34006e6a11974088a46d42b903418eJohann    usat    r11, #8, r11            ; d5 = clip8(p5 + o5)
1741b362b15af34006e6a11974088a46d42b903418eJohann
1751b362b15af34006e6a11974088a46d42b903418eJohann    ldrb    r7, [r1, #2]            ; pred p6
1761b362b15af34006e6a11974088a46d42b903418eJohann    ldrb    r10, [r1, #3]           ; pred p6
1771b362b15af34006e6a11974088a46d42b903418eJohann
1781b362b15af34006e6a11974088a46d42b903418eJohann    add     r12, r12, r11, lsl #8   ; |--|--|d5|d4|
1791b362b15af34006e6a11974088a46d42b903418eJohann
1801b362b15af34006e6a11974088a46d42b903418eJohann    add     r7, r7, r8, asr #3      ; p6 + o6
1811b362b15af34006e6a11974088a46d42b903418eJohann    add     r10, r10, r6, asr #3    ; p7 + o7
1821b362b15af34006e6a11974088a46d42b903418eJohann
1831b362b15af34006e6a11974088a46d42b903418eJohann    ldr     r0, [sp]                ; load input pointer
1841b362b15af34006e6a11974088a46d42b903418eJohann
1851b362b15af34006e6a11974088a46d42b903418eJohann    usat    r7, #8, r7              ; d6 = clip8(p6 + o6)
1861b362b15af34006e6a11974088a46d42b903418eJohann    usat    r10, #8, r10            ; d7 = clip8(p7 + o7)
1871b362b15af34006e6a11974088a46d42b903418eJohann
1881b362b15af34006e6a11974088a46d42b903418eJohann    add     r12, r12, r7, lsl #16   ; |--|d6|d5|d4|
1891b362b15af34006e6a11974088a46d42b903418eJohann    add     r12, r12, r10, lsl #24  ; |d7|d6|d5|d4|
1901b362b15af34006e6a11974088a46d42b903418eJohann
1911b362b15af34006e6a11974088a46d42b903418eJohann    str     r12, [r3], lr
1921b362b15af34006e6a11974088a46d42b903418eJohann    add     r0, r0, #16
1931b362b15af34006e6a11974088a46d42b903418eJohann    add     r1, r1, r2              ; pred + pitch
1941b362b15af34006e6a11974088a46d42b903418eJohann
1951b362b15af34006e6a11974088a46d42b903418eJohann    bcs loop2_dual
1961b362b15af34006e6a11974088a46d42b903418eJohann
1971b362b15af34006e6a11974088a46d42b903418eJohann    add     sp, sp, #4              ; idct_output buffer
1981b362b15af34006e6a11974088a46d42b903418eJohann    ldmia   sp!, {r4 - r11, pc}
1991b362b15af34006e6a11974088a46d42b903418eJohann
2001b362b15af34006e6a11974088a46d42b903418eJohann    ENDP
2011b362b15af34006e6a11974088a46d42b903418eJohann
2021b362b15af34006e6a11974088a46d42b903418eJohann    END
203