190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    EXPORT  |vp8_short_idct4x4llm_neon|
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ARM
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    REQUIRE8
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    PRESERVE8
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    AREA ||.text||, CODE, READONLY, ALIGN=2
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;*************************************************************
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r0 short * input
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r1 short * output
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r2 int pitch
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;*************************************************************
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;static const int cospi8sqrt2minus1=20091;
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;static const int sinpi8sqrt2      =35468;
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;static const int rounding = 0;
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Optimization note: The resulted data from dequantization are signed 13-bit data that is
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;in the range of [-4096, 4095]. This allows to use "vqdmulh"(neon) instruction since
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;it won't go out of range (13+16+1=30bits<32bits). This instruction gives the high half
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;result of the multiplication that is needed in IDCT.
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_short_idct4x4llm_neon| PROC
34d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel    adr             r12, idct_coeff
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.16         {q1, q2}, [r0]
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vld1.16         {d0}, [r12]
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vswp            d3, d4                  ;q2(vp[4] vp[12])
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q3, q2, d0[2]
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q4, q2, d0[0]
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d12, d2, d3             ;a1
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d13, d2, d3             ;b1
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q3, q3, #1
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q4, q4, #1
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q4, q4, q2
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;d6 - c1:temp1
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;d7 - d1:temp2
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;d8 - d1:temp1
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;d9 - c1:temp2
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d10, d6, d9             ;c1
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d11, d7, d8             ;d1
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d2, d12, d11
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d3, d13, d10
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d4, d13, d10
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d5, d12, d11
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.32         d2, d4
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.32         d3, d5
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.16         d2, d3
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.16         d4, d5
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vswp            d3, d4
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q3, q2, d0[2]
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqdmulh.s16     q4, q2, d0[0]
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d12, d2, d3             ;a1
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d13, d2, d3             ;b1
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q3, q3, #1
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vshr.s16        q4, q4, #1
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       q4, q4, q2
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d10, d6, d9             ;c1
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d11, d7, d8             ;d1
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d2, d12, d11
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqadd.s16       d3, d13, d10
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d4, d13, d10
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vqsub.s16       d5, d12, d11
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vrshr.s16       d2, d2, #3
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vrshr.s16       d3, d3, #3
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vrshr.s16       d4, d4, #3
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vrshr.s16       d5, d5, #3
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r3, r1, r2
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r12, r3, r2
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add             r0, r12, r2
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.32         d2, d4
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.32         d3, d5
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.16         d2, d3
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vtrn.16         d4, d5
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.16         {d2}, [r1]
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.16         {d3}, [r3]
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.16         {d4}, [r12]
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vst1.16         {d5}, [r0]
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bx             lr
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ENDP
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;-----------------
11679f15823c34ae1e423108295e416213200bb280fAndreas Huber
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberidct_coeff
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    DCD     0x4e7b4e7b, 0x8a8c8a8c
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;20091, 20091, 35468, 35468
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    END
123