190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_short_idct4x4llm_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;************************************************************* 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void vp8_short_idct4x4llm_c(short *input, short *output, int pitch) 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r0 short * input 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r1 short * output 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r2 int pitch 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;************************************************************* 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;static const int cospi8sqrt2minus1=20091; 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;static const int sinpi8sqrt2 =35468; 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;static const int rounding = 0; 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;Optimization note: The resulted data from dequantization are signed 13-bit data that is 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;in the range of [-4096, 4095]. This allows to use "vqdmulh"(neon) instruction since 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;it won't go out of range (13+16+1=30bits<32bits). This instruction gives the high half 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;result of the multiplication that is needed in IDCT. 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_short_idct4x4llm_neon| PROC 34d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, idct_coeff 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.16 {q1, q2}, [r0] 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.16 {d0}, [r12] 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d3, d4 ;q2(vp[4] vp[12]) 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q3, q2, d0[2] 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q4, q2, d0[0] 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d12, d2, d3 ;a1 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d13, d2, d3 ;b1 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q3, q3, #1 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q4, q4, #1 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q4, q4, q2 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;d6 - c1:temp1 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;d7 - d1:temp2 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;d8 - d1:temp1 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;d9 - c1:temp2 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d10, d6, d9 ;c1 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d11, d7, d8 ;d1 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d2, d12, d11 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d3, d13, d10 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d4, d13, d10 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d5, d12, d11 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.32 d2, d4 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.32 d3, d5 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.16 d2, d3 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.16 d4, d5 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d3, d4 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q3, q2, d0[2] 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q4, q2, d0[0] 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d12, d2, d3 ;a1 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d13, d2, d3 ;b1 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q3, q3, #1 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q4, q4, #1 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q4, q4, q2 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d10, d6, d9 ;c1 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d11, d7, d8 ;d1 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d2, d12, d11 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d3, d13, d10 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d4, d13, d10 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d5, d12, d11 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vrshr.s16 d2, d2, #3 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vrshr.s16 d3, d3, #3 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vrshr.s16 d4, d4, #3 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vrshr.s16 d5, d5, #3 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r1, r2 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r12, r3, r2 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r0, r12, r2 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.32 d2, d4 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.32 d3, d5 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.16 d2, d3 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.16 d4, d5 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.16 {d2}, [r1] 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.16 {d3}, [r3] 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.16 {d4}, [r12] 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vst1.16 {d5}, [r0] 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bx lr 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 11679f15823c34ae1e423108295e416213200bb280fAndreas Huber 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberidct_coeff 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0x4e7b4e7b, 0x8a8c8a8c 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;20091, 20091, 35468, 35468 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 123