1233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan; that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan; tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan; in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan; be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp8_short_idct4x4llm_neon| 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan ARM 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan REQUIRE8 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan PRESERVE8 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan AREA ||.text||, CODE, READONLY, ALIGN=2 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan;************************************************************* 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan;void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan; unsigned char *dst, int stride) 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r0 short * input 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r1 short * pred 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r2 int pitch 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan;r3 unsigned char dst 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan;sp int stride 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan;************************************************************* 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan; static const int cospi8sqrt2minus1=20091; 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan; static const int sinpi8sqrt2 =35468; 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan; static const int rounding = 0; 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Optimization note: The resulted data from dequantization are signed 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 13-bit data that is in the range of [-4096, 4095]. This allows to 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan; use "vqdmulh"(neon) instruction since it won't go out of range 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan; (13+16+1=30bits<32bits). This instruction gives the high half 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan; result of the multiplication that is needed in IDCT. 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_short_idct4x4llm_neon| PROC 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan adr r12, idct_coeff 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.16 {q1, q2}, [r0] 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.16 {d0}, [r12] 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan vswp d3, d4 ;q2(vp[4] vp[12]) 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r0, [sp] ; stride 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqdmulh.s16 q3, q2, d0[2] 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqdmulh.s16 q4, q2, d0[0] 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 d12, d2, d3 ;a1 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s16 d13, d2, d3 ;b1 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan vshr.s16 q3, q3, #1 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan vshr.s16 q4, q4, #1 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q4, q4, q2 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;d6 - c1:temp1 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;d7 - d1:temp2 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;d8 - d1:temp1 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;d9 - c1:temp2 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s16 d10, d6, d9 ;c1 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 d11, d7, d8 ;d1 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 d2, d12, d11 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 d3, d13, d10 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s16 d4, d13, d10 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s16 d5, d12, d11 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d2, d4 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d3, d5 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d2, d3 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d4, d5 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan vswp d3, d4 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqdmulh.s16 q3, q2, d0[2] 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqdmulh.s16 q4, q2, d0[0] 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 d12, d2, d3 ;a1 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s16 d13, d2, d3 ;b1 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan vshr.s16 q3, q3, #1 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan vshr.s16 q4, q4, #1 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q4, q4, q2 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s16 d10, d6, d9 ;c1 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 d11, d7, d8 ;d1 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 d2, d12, d11 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 d3, d13, d10 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s16 d4, d13, d10 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s16 d5, d12, d11 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan vrshr.s16 d2, d2, #3 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan vrshr.s16 d3, d3, #3 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan vrshr.s16 d4, d4, #3 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan vrshr.s16 d5, d5, #3 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d2, d4 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d3, d5 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d2, d3 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d4, d5 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; load prediction data 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.32 d6[0], [r1], r2 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.32 d6[1], [r1], r2 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.32 d7[0], [r1], r2 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.32 d7[1], [r1], r2 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; add prediction and residual 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q1, q1, d6 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q2, q2, d7 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqmovun.s16 d1, q1 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqmovun.s16 d2, q2 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; store to destination 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.32 d1[0], [r3], r0 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.32 d1[1], [r3], r0 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.32 d2[0], [r3], r0 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.32 d2[1], [r3], r0 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan bx lr 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan;----------------- 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan 134233d2500723e5594f3e7c70896ffeeef32b9c950ywanidct_coeff 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 0x4e7b4e7b, 0x8a8c8a8c 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan;20091, 20091, 35468, 35468 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan END 140