1233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan    EXPORT  |vp8_short_idct4x4llm_neon|
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ARM
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan    REQUIRE8
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan    PRESERVE8
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan    AREA ||.text||, CODE, READONLY, ALIGN=2
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan;*************************************************************
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan;void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan;                            unsigned char *dst, int stride)
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r0 short * input
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r1 short * pred
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r2 int pitch
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r3 unsigned char dst
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan;sp int stride
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan;*************************************************************
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan; static const int cospi8sqrt2minus1=20091;
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan; static const int sinpi8sqrt2      =35468;
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan; static const int rounding = 0;
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Optimization note: The resulted data from dequantization are signed
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 13-bit data that is in the range of [-4096, 4095]. This allows to
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan; use "vqdmulh"(neon) instruction since it won't go out of range
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan; (13+16+1=30bits<32bits). This instruction gives the high half
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan; result of the multiplication that is needed in IDCT.
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_short_idct4x4llm_neon| PROC
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan    adr             r12, idct_coeff
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.16         {q1, q2}, [r0]
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.16         {d0}, [r12]
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vswp            d3, d4                  ;q2(vp[4] vp[12])
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ldr             r0, [sp]                ; stride
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q3, q2, d0[2]
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q4, q2, d0[0]
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       d12, d2, d3             ;a1
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqsub.s16       d13, d2, d3             ;b1
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q3, q3, #1
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q4, q4, #1
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q4, q4, q2
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;d6 - c1:temp1
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;d7 - d1:temp2
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;d8 - d1:temp1
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;d9 - c1:temp2
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqsub.s16       d10, d6, d9             ;c1
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       d11, d7, d8             ;d1
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       d2, d12, d11
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       d3, d13, d10
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqsub.s16       d4, d13, d10
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqsub.s16       d5, d12, d11
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtrn.32         d2, d4
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtrn.32         d3, d5
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtrn.16         d2, d3
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtrn.16         d4, d5
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vswp            d3, d4
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q3, q2, d0[2]
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqdmulh.s16     q4, q2, d0[0]
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       d12, d2, d3             ;a1
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqsub.s16       d13, d2, d3             ;b1
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q3, q3, #1
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vshr.s16        q4, q4, #1
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       q4, q4, q2
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqsub.s16       d10, d6, d9             ;c1
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       d11, d7, d8             ;d1
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       d2, d12, d11
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqadd.s16       d3, d13, d10
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqsub.s16       d4, d13, d10
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqsub.s16       d5, d12, d11
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vrshr.s16       d2, d2, #3
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vrshr.s16       d3, d3, #3
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vrshr.s16       d4, d4, #3
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vrshr.s16       d5, d5, #3
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtrn.32         d2, d4
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtrn.32         d3, d5
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtrn.16         d2, d3
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vtrn.16         d4, d5
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ; load prediction data
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.32         d6[0], [r1], r2
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.32         d6[1], [r1], r2
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.32         d7[0], [r1], r2
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.32         d7[1], [r1], r2
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ; add prediction and residual
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vaddw.u8        q1, q1, d6
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vaddw.u8        q2, q2, d7
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqmovun.s16     d1, q1
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vqmovun.s16     d2, q2
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ; store to destination
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.32         d1[0], [r3], r0
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.32         d1[1], [r3], r0
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.32         d2[0], [r3], r0
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.32         d2[1], [r3], r0
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan    bx              lr
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ENDP
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-----------------
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan
134233d2500723e5594f3e7c70896ffeeef32b9c950ywanidct_coeff
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan    DCD     0x4e7b4e7b, 0x8a8c8a8c
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan;20091, 20091, 35468, 35468
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan    END
140