10a39d0a697ff3603e8c100300fda363658e10b23James Zern/*
20a39d0a697ff3603e8c100300fda363658e10b23James Zern *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
30a39d0a697ff3603e8c100300fda363658e10b23James Zern *
40a39d0a697ff3603e8c100300fda363658e10b23James Zern *  Use of this source code is governed by a BSD-style license
50a39d0a697ff3603e8c100300fda363658e10b23James Zern *  that can be found in the LICENSE file in the root of the source
60a39d0a697ff3603e8c100300fda363658e10b23James Zern *  tree. An additional intellectual property rights grant can be found
70a39d0a697ff3603e8c100300fda363658e10b23James Zern *  in the file PATENTS.  All contributing project authors may
80a39d0a697ff3603e8c100300fda363658e10b23James Zern *  be found in the AUTHORS file in the root of the source tree.
90a39d0a697ff3603e8c100300fda363658e10b23James Zern */
100a39d0a697ff3603e8c100300fda363658e10b23James Zern
110a39d0a697ff3603e8c100300fda363658e10b23James Zern#ifndef VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_
120a39d0a697ff3603e8c100300fda363658e10b23James Zern#define VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_
130a39d0a697ff3603e8c100300fda363658e10b23James Zern
140a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "./vpx_config.h"
150a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "vpx/vpx_integer.h"
160a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "vpx_dsp/vpx_dsp_common.h"
170a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "vpx_dsp/ppc/types_vsx.h"
180a39d0a697ff3603e8c100300fda363658e10b23James Zern
190a39d0a697ff3603e8c100300fda363658e10b23James Zern// Load 8 16 bit values. If the source is 32 bits then pack down with
200a39d0a697ff3603e8c100300fda363658e10b23James Zern// saturation.
210a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE int16x8_t load_tran_low(int32_t c, const tran_low_t *s) {
220a39d0a697ff3603e8c100300fda363658e10b23James Zern#if CONFIG_VP9_HIGHBITDEPTH
230a39d0a697ff3603e8c100300fda363658e10b23James Zern  int32x4_t u = vec_vsx_ld(c, s);
240a39d0a697ff3603e8c100300fda363658e10b23James Zern  int32x4_t v = vec_vsx_ld(c, s + 4);
250a39d0a697ff3603e8c100300fda363658e10b23James Zern  return vec_packs(u, v);
260a39d0a697ff3603e8c100300fda363658e10b23James Zern#else
270a39d0a697ff3603e8c100300fda363658e10b23James Zern  return vec_vsx_ld(c, s);
280a39d0a697ff3603e8c100300fda363658e10b23James Zern#endif
290a39d0a697ff3603e8c100300fda363658e10b23James Zern}
300a39d0a697ff3603e8c100300fda363658e10b23James Zern
310a39d0a697ff3603e8c100300fda363658e10b23James Zern// Store 8 16 bit values. If the destination is 32 bits then sign extend the
320a39d0a697ff3603e8c100300fda363658e10b23James Zern// values by multiplying by 1.
330a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void store_tran_low(int16x8_t v, int32_t c, tran_low_t *s) {
340a39d0a697ff3603e8c100300fda363658e10b23James Zern#if CONFIG_VP9_HIGHBITDEPTH
350a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t one = vec_splat_s16(1);
360a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t even = vec_mule(v, one);
370a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t odd = vec_mulo(v, one);
380a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t high = vec_mergeh(even, odd);
390a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t low = vec_mergel(even, odd);
400a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(high, c, s);
410a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(low, c, s + 4);
420a39d0a697ff3603e8c100300fda363658e10b23James Zern#else
430a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v, c, s);
440a39d0a697ff3603e8c100300fda363658e10b23James Zern#endif
450a39d0a697ff3603e8c100300fda363658e10b23James Zern}
460a39d0a697ff3603e8c100300fda363658e10b23James Zern
470a39d0a697ff3603e8c100300fda363658e10b23James Zern#endif  // VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_
48