1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/* 2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * 4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */ 10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <arm_neon.h> 12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 137bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include "./vpx_dsp_rtcd.h" 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/inv_txfm.h" 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 167bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE uint8x8_t create_dcd(const int16_t dc) { 177bc9febe8749e98a3812a0dc4380ceae75c29450Johann int16x8_t t = vdupq_n_s16(dc); 187bc9febe8749e98a3812a0dc4380ceae75c29450Johann return vqmovun_s16(t); 197bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 217bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE void idct8x8_1_add_pos_kernel(uint8_t **dest, const int stride, 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t res) { 237bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t a = vld1_u8(*dest); 247bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t b = vqadd_u8(a, res); 257bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(*dest, b); 267bc9febe8749e98a3812a0dc4380ceae75c29450Johann *dest += stride; 277bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 297bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE void idct8x8_1_add_neg_kernel(uint8_t **dest, const int stride, 307bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t res) { 317bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t a = vld1_u8(*dest); 327bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t b = vqsub_u8(a, res); 337bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1_u8(*dest, b); 347bc9febe8749e98a3812a0dc4380ceae75c29450Johann *dest += stride; 357bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 377bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, 387bc9febe8749e98a3812a0dc4380ceae75c29450Johann int stride) { 39df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const int16_t out0 = 40df37111358d02836cb29bbcb9c6e4c95dff90a16Johann WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); 417bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64)); 427bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5); 43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 447bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (a1 >= 0) { 457bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t dc = create_dcd(a1); 467bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_pos_kernel(&dest, stride, dc); 477bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_pos_kernel(&dest, stride, dc); 487bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_pos_kernel(&dest, stride, dc); 497bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_pos_kernel(&dest, stride, dc); 507bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_pos_kernel(&dest, stride, dc); 517bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_pos_kernel(&dest, stride, dc); 527bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_pos_kernel(&dest, stride, dc); 537bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_pos_kernel(&dest, stride, dc); 547bc9febe8749e98a3812a0dc4380ceae75c29450Johann } else { 557bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8x8_t dc = create_dcd(-a1); 567bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_neg_kernel(&dest, stride, dc); 577bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_neg_kernel(&dest, stride, dc); 587bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_neg_kernel(&dest, stride, dc); 597bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_neg_kernel(&dest, stride, dc); 607bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_neg_kernel(&dest, stride, dc); 617bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_neg_kernel(&dest, stride, dc); 627bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_neg_kernel(&dest, stride, dc); 637bc9febe8749e98a3812a0dc4380ceae75c29450Johann idct8x8_1_add_neg_kernel(&dest, stride, dc); 647bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 66