1d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;
2d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;
4d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;  Use of this source code is governed by a BSD-style license and patent
5d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;  grant that can be found in the LICENSE file in the root of the source
6d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;  tree. All contributing project authors may be found in the AUTHORS
7d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;  file in the root of the source tree.
8d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;
9d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
10d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    EXPORT  |vp9_idct32x32_1_add_neon|
11d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ARM
12d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    REQUIRE8
13d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    PRESERVE8
14d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
15d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    AREA ||.text||, CODE, READONLY, ALIGN=2
16d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
17d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ;TODO(hkuang): put the following macros in a seperate
18d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ;file so other idct function could also use them.
19d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    MACRO
20d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    LD_16x8          $src, $stride
21d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vld1.8           {q8}, [$src], $stride
22d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vld1.8           {q9}, [$src], $stride
23d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vld1.8           {q10}, [$src], $stride
24d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vld1.8           {q11}, [$src], $stride
25d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vld1.8           {q12}, [$src], $stride
26d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vld1.8           {q13}, [$src], $stride
27d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vld1.8           {q14}, [$src], $stride
28d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vld1.8           {q15}, [$src], $stride
29d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    MEND
30d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
31d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    MACRO
32d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ADD_DIFF_16x8    $diff
33d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqadd.u8         q8, q8, $diff
34d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqadd.u8         q9, q9, $diff
35d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqadd.u8         q10, q10, $diff
36d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqadd.u8         q11, q11, $diff
37d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqadd.u8         q12, q12, $diff
38d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqadd.u8         q13, q13, $diff
39d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqadd.u8         q14, q14, $diff
40d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqadd.u8         q15, q15, $diff
41d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    MEND
42d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
43d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    MACRO
44d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    SUB_DIFF_16x8    $diff
45d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqsub.u8         q8, q8, $diff
46d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqsub.u8         q9, q9, $diff
47d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqsub.u8         q10, q10, $diff
48d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqsub.u8         q11, q11, $diff
49d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqsub.u8         q12, q12, $diff
50d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqsub.u8         q13, q13, $diff
51d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqsub.u8         q14, q14, $diff
52d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vqsub.u8         q15, q15, $diff
53d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    MEND
54d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
55d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    MACRO
56d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ST_16x8          $dst, $stride
57d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vst1.8           {q8}, [$dst], $stride
58d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vst1.8           {q9}, [$dst], $stride
59d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vst1.8           {q10},[$dst], $stride
60d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vst1.8           {q11},[$dst], $stride
61d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vst1.8           {q12},[$dst], $stride
62d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vst1.8           {q13},[$dst], $stride
63d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vst1.8           {q14},[$dst], $stride
64d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vst1.8           {q15},[$dst], $stride
65d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    MEND
66d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
67d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;void vp9_idct32x32_1_add_neon(int16_t *input, uint8_t *dest,
68d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;                              int dest_stride)
69d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org;
70d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org; r0  int16_t input
71d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org; r1  uint8_t *dest
72d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org; r2  int dest_stride
73d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
74d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org|vp9_idct32x32_1_add_neon| PROC
75d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    push             {lr}
76d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    pld              [r1]
77d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    add              r3, r1, #16               ; r3 dest + 16 for second loop
78d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ldrsh            r0, [r0]
79d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
80d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ; generate cospi_16_64 = 11585
81d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    mov              r12, #0x2d00
82d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    add              r12, #0x41
83d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
84d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ; out = dct_const_round_shift(input[0] * cospi_16_64)
85d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    mul              r0, r0, r12               ; input[0] * cospi_16_64
86d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    add              r0, r0, #0x2000           ; +(1 << ((DCT_CONST_BITS) - 1))
87d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    asr              r0, r0, #14               ; >> DCT_CONST_BITS
88d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
89d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ; out = dct_const_round_shift(out * cospi_16_64)
90d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    mul              r0, r0, r12               ; out * cospi_16_64
91d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    mov              r12, r1                   ; save dest
92d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    add              r0, r0, #0x2000           ; +(1 << ((DCT_CONST_BITS) - 1))
93d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    asr              r0, r0, #14               ; >> DCT_CONST_BITS
94d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
95d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ; a1 = ROUND_POWER_OF_TWO(out, 6)
96d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    add              r0, r0, #32               ; + (1 <<((6) - 1))
97d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    asrs             r0, r0, #6                ; >> 6
98d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    bge              diff_positive_32_32
99d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
100d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.orgdiff_negative_32_32
101d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    neg              r0, r0
102d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    usat             r0, #8, r0
103d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vdup.u8          q0, r0
104d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    mov              r0, #4
105d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
106d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.orgdiff_negative_32_32_loop
107d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    sub              r0, #1
108d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    LD_16x8          r1, r2
109d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    SUB_DIFF_16x8    q0
110d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ST_16x8          r12, r2
111d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
112d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    LD_16x8          r1, r2
113d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    SUB_DIFF_16x8    q0
114d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ST_16x8          r12, r2
115d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    cmp              r0, #2
116d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    moveq            r1, r3
117d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    moveq            r12, r3
118d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    cmp              r0, #0
119d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    bne              diff_negative_32_32_loop
120d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    pop              {pc}
121d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
122d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.orgdiff_positive_32_32
123d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    usat             r0, #8, r0
124d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    vdup.u8          q0, r0
125d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    mov              r0, #4
126d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
127d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.orgdiff_positive_32_32_loop
128d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    sub              r0, #1
129d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    LD_16x8          r1, r2
130d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ADD_DIFF_16x8    q0
131d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ST_16x8          r12, r2
132d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
133d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    LD_16x8          r1, r2
134d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ADD_DIFF_16x8    q0
135d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ST_16x8          r12, r2
136d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    cmp              r0, #2
137d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    moveq            r1, r3
138d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    moveq            r12, r3
139d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    cmp              r0, #0
140d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    bne              diff_positive_32_32_loop
141d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    pop              {pc}
142d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org
143d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    ENDP             ; |vp9_idct32x32_1_add_neon|
144d851b91d14ef0bd71acdce7b90c9a8f1af1181adjohannkoenig@chromium.org    END
145