17ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian/* 27ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 37ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian * 47ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 57ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 67ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 77ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 87ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 97ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian */ 107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 117ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include <arm_neon.h> 127ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vp8/encoder/block.h" 137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 147ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanianstatic const uint16_t inv_zig_zag[16] = { 157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1, 2, 6, 7, 167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3, 5, 8, 13, 177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4, 9, 12, 14, 187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 10, 11, 15, 16 197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian}; 207ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 217ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanianvoid vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) { 227ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian const int16x8_t one_q = vdupq_n_s16(-1), 237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian z0 = vld1q_s16(b->coeff), 247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian z1 = vld1q_s16(b->coeff + 8), 257ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian round0 = vld1q_s16(b->round), 267ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian round1 = vld1q_s16(b->round + 8), 277ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian quant0 = vld1q_s16(b->quant_fast), 287ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian quant1 = vld1q_s16(b->quant_fast + 8), 297ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian dequant0 = vld1q_s16(d->dequant), 307ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian dequant1 = vld1q_s16(d->dequant + 8); 317ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian const uint16x8_t zig_zag0 = vld1q_u16(inv_zig_zag), 327ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian zig_zag1 = vld1q_u16(inv_zig_zag + 8); 337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian int16x8_t x0, x1, sz0, sz1, y0, y1; 347ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian uint16x8_t eob0, eob1; 357ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian uint16x4_t eob_d16; 367ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian uint32x2_t eob_d32; 377ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian uint32x4_t eob_q32; 387ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 397ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* sign of z: z >> 15 */ 407ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian sz0 = vshrq_n_s16(z0, 15); 417ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian sz1 = vshrq_n_s16(z1, 15); 427ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 437ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* x = abs(z) */ 447ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian x0 = vabsq_s16(z0); 457ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian x1 = vabsq_s16(z1); 467ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 477ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* x += round */ 487ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian x0 = vaddq_s16(x0, round0); 497ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian x1 = vaddq_s16(x1, round1); 507ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 517ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* y = 2 * (x * quant) >> 16 */ 527ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian y0 = vqdmulhq_s16(x0, quant0); 537ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian y1 = vqdmulhq_s16(x1, quant1); 547ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 557ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* Compensate for doubling in vqdmulhq */ 567ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian y0 = vshrq_n_s16(y0, 1); 577ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian y1 = vshrq_n_s16(y1, 1); 587ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 597ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* Restore sign bit */ 607ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian y0 = veorq_s16(y0, sz0); 617ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian y1 = veorq_s16(y1, sz1); 627ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian x0 = vsubq_s16(y0, sz0); 637ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian x1 = vsubq_s16(y1, sz1); 647ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 657ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* find non-zero elements */ 667ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob0 = vtstq_s16(x0, one_q); 677ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob1 = vtstq_s16(x1, one_q); 687ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 697ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* mask zig zag */ 707ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob0 = vandq_u16(eob0, zig_zag0); 717ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob1 = vandq_u16(eob1, zig_zag1); 727ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 737ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* select the largest value */ 747ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob0 = vmaxq_u16(eob0, eob1); 757ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0)); 767ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob_q32 = vmovl_u16(eob_d16); 777ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob_d32 = vmax_u32(vget_low_u32(eob_q32), vget_high_u32(eob_q32)); 787ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian eob_d32 = vpmax_u32(eob_d32, eob_d32); 797ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 807ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* qcoeff = x */ 817ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1q_s16(d->qcoeff, x0); 827ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1q_s16(d->qcoeff + 8, x1); 837ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 847ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian /* dqcoeff = x * dequant */ 857ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1q_s16(d->dqcoeff, vmulq_s16(dequant0, x0)); 867ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1q_s16(d->dqcoeff + 8, vmulq_s16(dequant1, x1)); 877ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 887ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0); 897ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian} 90