1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/*
2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *
4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Use of this source code is governed by a BSD-style license
5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  that can be found in the LICENSE file in the root of the source
6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  tree. An additional intellectual property rights grant can be found
7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  in the file PATENTS.  All contributing project authors may
8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  be found in the AUTHORS file in the root of the source tree.
9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */
10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <arm_neon.h>
12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
137bc9febe8749e98a3812a0dc4380ceae75c29450Johann#include "./vpx_dsp_rtcd.h"
14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/inv_txfm.h"
15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
167bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE uint8x8_t create_dcd(const int16_t dc) {
177bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int16x8_t t = vdupq_n_s16(dc);
187bc9febe8749e98a3812a0dc4380ceae75c29450Johann  return vqmovun_s16(t);
197bc9febe8749e98a3812a0dc4380ceae75c29450Johann}
20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
217bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE void idct8x8_1_add_pos_kernel(uint8_t **dest, const int stride,
227bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                            const uint8x8_t res) {
237bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const uint8x8_t a = vld1_u8(*dest);
247bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const uint8x8_t b = vqadd_u8(a, res);
257bc9febe8749e98a3812a0dc4380ceae75c29450Johann  vst1_u8(*dest, b);
267bc9febe8749e98a3812a0dc4380ceae75c29450Johann  *dest += stride;
277bc9febe8749e98a3812a0dc4380ceae75c29450Johann}
28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
297bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic INLINE void idct8x8_1_add_neg_kernel(uint8_t **dest, const int stride,
307bc9febe8749e98a3812a0dc4380ceae75c29450Johann                                            const uint8x8_t res) {
317bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const uint8x8_t a = vld1_u8(*dest);
327bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const uint8x8_t b = vqsub_u8(a, res);
337bc9febe8749e98a3812a0dc4380ceae75c29450Johann  vst1_u8(*dest, b);
347bc9febe8749e98a3812a0dc4380ceae75c29450Johann  *dest += stride;
357bc9febe8749e98a3812a0dc4380ceae75c29450Johann}
36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
377bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest,
387bc9febe8749e98a3812a0dc4380ceae75c29450Johann                            int stride) {
39df37111358d02836cb29bbcb9c6e4c95dff90a16Johann  const int16_t out0 =
40df37111358d02836cb29bbcb9c6e4c95dff90a16Johann      WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64));
417bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64));
427bc9febe8749e98a3812a0dc4380ceae75c29450Johann  const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5);
43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
447bc9febe8749e98a3812a0dc4380ceae75c29450Johann  if (a1 >= 0) {
457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    const uint8x8_t dc = create_dcd(a1);
467bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_pos_kernel(&dest, stride, dc);
477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_pos_kernel(&dest, stride, dc);
487bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_pos_kernel(&dest, stride, dc);
497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_pos_kernel(&dest, stride, dc);
507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_pos_kernel(&dest, stride, dc);
517bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_pos_kernel(&dest, stride, dc);
527bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_pos_kernel(&dest, stride, dc);
537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_pos_kernel(&dest, stride, dc);
547bc9febe8749e98a3812a0dc4380ceae75c29450Johann  } else {
557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    const uint8x8_t dc = create_dcd(-a1);
567bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_neg_kernel(&dest, stride, dc);
577bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_neg_kernel(&dest, stride, dc);
587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_neg_kernel(&dest, stride, dc);
597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_neg_kernel(&dest, stride, dc);
607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_neg_kernel(&dest, stride, dc);
617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_neg_kernel(&dest, stride, dc);
627bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_neg_kernel(&dest, stride, dc);
637bc9febe8749e98a3812a0dc4380ceae75c29450Johann    idct8x8_1_add_neg_kernel(&dest, stride, dc);
647bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
66