190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EXPORT |vp8_short_idct4x4llm_neon| 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ARM 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber REQUIRE8 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PRESERVE8 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AREA ||.text||, CODE, READONLY, ALIGN=2 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;************************************************************* 201b362b15af34006e6a11974088a46d42b903418eJohann;void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, 211b362b15af34006e6a11974088a46d42b903418eJohann; unsigned char *dst, int stride) 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r0 short * input 231b362b15af34006e6a11974088a46d42b903418eJohann;r1 short * pred 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;r2 int pitch 251b362b15af34006e6a11974088a46d42b903418eJohann;r3 unsigned char dst 261b362b15af34006e6a11974088a46d42b903418eJohann;sp int stride 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;************************************************************* 281b362b15af34006e6a11974088a46d42b903418eJohann 291b362b15af34006e6a11974088a46d42b903418eJohann; static const int cospi8sqrt2minus1=20091; 301b362b15af34006e6a11974088a46d42b903418eJohann; static const int sinpi8sqrt2 =35468; 311b362b15af34006e6a11974088a46d42b903418eJohann; static const int rounding = 0; 321b362b15af34006e6a11974088a46d42b903418eJohann 331b362b15af34006e6a11974088a46d42b903418eJohann; Optimization note: The resulted data from dequantization are signed 341b362b15af34006e6a11974088a46d42b903418eJohann; 13-bit data that is in the range of [-4096, 4095]. This allows to 351b362b15af34006e6a11974088a46d42b903418eJohann; use "vqdmulh"(neon) instruction since it won't go out of range 361b362b15af34006e6a11974088a46d42b903418eJohann; (13+16+1=30bits<32bits). This instruction gives the high half 371b362b15af34006e6a11974088a46d42b903418eJohann; result of the multiplication that is needed in IDCT. 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber|vp8_short_idct4x4llm_neon| PROC 40d35fe0269d77984b383b6bdc051f26b72da15277Ard Biesheuvel adr r12, idct_coeff 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.16 {q1, q2}, [r0] 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vld1.16 {d0}, [r12] 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d3, d4 ;q2(vp[4] vp[12]) 451b362b15af34006e6a11974088a46d42b903418eJohann ldr r0, [sp] ; stride 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q3, q2, d0[2] 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q4, q2, d0[0] 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d12, d2, d3 ;a1 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d13, d2, d3 ;b1 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q3, q3, #1 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q4, q4, #1 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q4, q4, q2 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;d6 - c1:temp1 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;d7 - d1:temp2 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;d8 - d1:temp1 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;d9 - c1:temp2 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d10, d6, d9 ;c1 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d11, d7, d8 ;d1 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d2, d12, d11 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d3, d13, d10 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d4, d13, d10 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d5, d12, d11 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.32 d2, d4 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.32 d3, d5 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.16 d2, d3 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.16 d4, d5 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vswp d3, d4 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q3, q2, d0[2] 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqdmulh.s16 q4, q2, d0[0] 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d12, d2, d3 ;a1 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d13, d2, d3 ;b1 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q3, q3, #1 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vshr.s16 q4, q4, #1 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 q4, q4, q2 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d10, d6, d9 ;c1 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d11, d7, d8 ;d1 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d2, d12, d11 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqadd.s16 d3, d13, d10 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d4, d13, d10 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vqsub.s16 d5, d12, d11 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vrshr.s16 d2, d2, #3 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vrshr.s16 d3, d3, #3 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vrshr.s16 d4, d4, #3 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vrshr.s16 d5, d5, #3 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.32 d2, d4 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.32 d3, d5 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.16 d2, d3 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vtrn.16 d4, d5 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1091b362b15af34006e6a11974088a46d42b903418eJohann ; load prediction data 1101b362b15af34006e6a11974088a46d42b903418eJohann vld1.32 d6[0], [r1], r2 1111b362b15af34006e6a11974088a46d42b903418eJohann vld1.32 d6[1], [r1], r2 1121b362b15af34006e6a11974088a46d42b903418eJohann vld1.32 d7[0], [r1], r2 1131b362b15af34006e6a11974088a46d42b903418eJohann vld1.32 d7[1], [r1], r2 1141b362b15af34006e6a11974088a46d42b903418eJohann 1151b362b15af34006e6a11974088a46d42b903418eJohann ; add prediction and residual 1161b362b15af34006e6a11974088a46d42b903418eJohann vaddw.u8 q1, q1, d6 1171b362b15af34006e6a11974088a46d42b903418eJohann vaddw.u8 q2, q2, d7 1181b362b15af34006e6a11974088a46d42b903418eJohann 1191b362b15af34006e6a11974088a46d42b903418eJohann vqmovun.s16 d1, q1 1201b362b15af34006e6a11974088a46d42b903418eJohann vqmovun.s16 d2, q2 1211b362b15af34006e6a11974088a46d42b903418eJohann 1221b362b15af34006e6a11974088a46d42b903418eJohann ; store to destination 1231b362b15af34006e6a11974088a46d42b903418eJohann vst1.32 d1[0], [r3], r0 1241b362b15af34006e6a11974088a46d42b903418eJohann vst1.32 d1[1], [r3], r0 1251b362b15af34006e6a11974088a46d42b903418eJohann vst1.32 d2[0], [r3], r0 1261b362b15af34006e6a11974088a46d42b903418eJohann vst1.32 d2[1], [r3], r0 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1281b362b15af34006e6a11974088a46d42b903418eJohann bx lr 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ENDP 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;----------------- 13379f15823c34ae1e423108295e416213200bb280fAndreas Huber 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberidct_coeff 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber DCD 0x4e7b4e7b, 0x8a8c8a8c 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;20091, 20091, 35468, 35468 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber END 140