190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_short_idct4x4llm_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;*************************************************************
201b362b15af34006e6a11974088a46d42b903418eJohann;void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
211b362b15af34006e6a11974088a46d42b903418eJohann;                            unsigned char *dst, int stride)
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r0 short * input
231b362b15af34006e6a11974088a46d42b903418eJohann;r1 short * pred
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r2 int pitch
251b362b15af34006e6a11974088a46d42b903418eJohann;r3 unsigned char dst
261b362b15af34006e6a11974088a46d42b903418eJohann;sp int stride
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;*************************************************************
281b362b15af34006e6a11974088a46d42b903418eJohann
291b362b15af34006e6a11974088a46d42b903418eJohann; static const int cospi8sqrt2minus1=20091;
301b362b15af34006e6a11974088a46d42b903418eJohann; static const int sinpi8sqrt2      =35468;
311b362b15af34006e6a11974088a46d42b903418eJohann; static const int rounding = 0;
321b362b15af34006e6a11974088a46d42b903418eJohann
331b362b15af34006e6a11974088a46d42b903418eJohann; Optimization note: The resulted data from dequantization are signed
341b362b15af34006e6a11974088a46d42b903418eJohann; 13-bit data that is in the range of [-4096, 4095]. This allows to
351b362b15af34006e6a11974088a46d42b903418eJohann; use "vqdmulh"(neon) instruction since it won't go out of range
361b362b15af34006e6a11974088a46d42b903418eJohann; (13+16+1=30bits<32bits). This instruction gives the high half
371b362b15af34006e6a11974088a46d42b903418eJohann; result of the multiplication that is needed in IDCT.
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_short_idct4x4llm_neon| PROC
40d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr             r12, idct_coeff
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.16         {q1, q2}, [r0]
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.16         {d0}, [r12]
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vswp            d3, d4                  ;q2(vp[4] vp[12])
451b362b15af34006e6a11974088a46d42b903418eJohann    ldr             r0, [sp]                ; stride
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q3, q2, d0[2]
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q4, q2, d0[0]
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d12, d2, d3             ;a1
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d13, d2, d3             ;b1
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q3, q3, #1
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q4, q4, #1
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q4, q4, q2
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;d6 - c1:temp1
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;d7 - d1:temp2
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;d8 - d1:temp1
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;d9 - c1:temp2
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d10, d6, d9             ;c1
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d11, d7, d8             ;d1
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d2, d12, d11
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d3, d13, d10
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d4, d13, d10
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d5, d12, d11
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.32         d2, d4
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.32         d3, d5
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.16         d2, d3
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.16         d4, d5
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vswp            d3, d4
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q3, q2, d0[2]
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q4, q2, d0[0]
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d12, d2, d3             ;a1
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d13, d2, d3             ;b1
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q3, q3, #1
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q4, q4, #1
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q4, q4, q2
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d10, d6, d9             ;c1
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d11, d7, d8             ;d1
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d2, d12, d11
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d3, d13, d10
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d4, d13, d10
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d5, d12, d11
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vrshr.s16       d2, d2, #3
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vrshr.s16       d3, d3, #3
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vrshr.s16       d4, d4, #3
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vrshr.s16       d5, d5, #3
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.32         d2, d4
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.32         d3, d5
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.16         d2, d3
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.16         d4, d5
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1091b362b15af34006e6a11974088a46d42b903418eJohann    ; load prediction data
1101b362b15af34006e6a11974088a46d42b903418eJohann    vld1.32         d6[0], [r1], r2
1111b362b15af34006e6a11974088a46d42b903418eJohann    vld1.32         d6[1], [r1], r2
1121b362b15af34006e6a11974088a46d42b903418eJohann    vld1.32         d7[0], [r1], r2
1131b362b15af34006e6a11974088a46d42b903418eJohann    vld1.32         d7[1], [r1], r2
1141b362b15af34006e6a11974088a46d42b903418eJohann
1151b362b15af34006e6a11974088a46d42b903418eJohann    ; add prediction and residual
1161b362b15af34006e6a11974088a46d42b903418eJohann    vaddw.u8        q1, q1, d6
1171b362b15af34006e6a11974088a46d42b903418eJohann    vaddw.u8        q2, q2, d7
1181b362b15af34006e6a11974088a46d42b903418eJohann
1191b362b15af34006e6a11974088a46d42b903418eJohann    vqmovun.s16     d1, q1
1201b362b15af34006e6a11974088a46d42b903418eJohann    vqmovun.s16     d2, q2
1211b362b15af34006e6a11974088a46d42b903418eJohann
1221b362b15af34006e6a11974088a46d42b903418eJohann    ; store to destination
1231b362b15af34006e6a11974088a46d42b903418eJohann    vst1.32         d1[0], [r3], r0
1241b362b15af34006e6a11974088a46d42b903418eJohann    vst1.32         d1[1], [r3], r0
1251b362b15af34006e6a11974088a46d42b903418eJohann    vst1.32         d2[0], [r3], r0
1261b362b15af34006e6a11974088a46d42b903418eJohann    vst1.32         d2[1], [r3], r0
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1281b362b15af34006e6a11974088a46d42b903418eJohann    bx              lr
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
13379f15823c34ae1e423108295e416213200bb280fAndreas Huber
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberidct_coeff
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0x4e7b4e7b, 0x8a8c8a8c
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;20091, 20091, 35468, 35468
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
140