10a39d0a697ff3603e8c100300fda363658e10b23James Zern/*
20a39d0a697ff3603e8c100300fda363658e10b23James Zern *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
30a39d0a697ff3603e8c100300fda363658e10b23James Zern *
40a39d0a697ff3603e8c100300fda363658e10b23James Zern *  Use of this source code is governed by a BSD-style license
50a39d0a697ff3603e8c100300fda363658e10b23James Zern *  that can be found in the LICENSE file in the root of the source
60a39d0a697ff3603e8c100300fda363658e10b23James Zern *  tree. An additional intellectual property rights grant can be found
70a39d0a697ff3603e8c100300fda363658e10b23James Zern *  in the file PATENTS.  All contributing project authors may
80a39d0a697ff3603e8c100300fda363658e10b23James Zern *  be found in the AUTHORS file in the root of the source tree.
90a39d0a697ff3603e8c100300fda363658e10b23James Zern */
100a39d0a697ff3603e8c100300fda363658e10b23James Zern
110a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "./vpx_dsp_rtcd.h"
120a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "vpx_dsp/ppc/types_vsx.h"
130a39d0a697ff3603e8c100300fda363658e10b23James Zern
140a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_v_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
150a39d0a697ff3603e8c100300fda363658e10b23James Zern                               const uint8_t *above, const uint8_t *left) {
160a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t d = vec_vsx_ld(0, above);
170a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
180a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
190a39d0a697ff3603e8c100300fda363658e10b23James Zern
200a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 16; i++, dst += stride) {
210a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(d, 0, dst);
220a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
230a39d0a697ff3603e8c100300fda363658e10b23James Zern}
240a39d0a697ff3603e8c100300fda363658e10b23James Zern
250a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_v_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
260a39d0a697ff3603e8c100300fda363658e10b23James Zern                               const uint8_t *above, const uint8_t *left) {
270a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t d0 = vec_vsx_ld(0, above);
280a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t d1 = vec_vsx_ld(16, above);
290a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
300a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
310a39d0a697ff3603e8c100300fda363658e10b23James Zern
320a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 32; i++, dst += stride) {
330a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(d0, 0, dst);
340a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(d1, 16, dst);
350a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
360a39d0a697ff3603e8c100300fda363658e10b23James Zern}
370a39d0a697ff3603e8c100300fda363658e10b23James Zern
380a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic const uint32x4_t mask4 = { 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
390a39d0a697ff3603e8c100300fda363658e10b23James Zern
400a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_h_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride,
410a39d0a697ff3603e8c100300fda363658e10b23James Zern                             const uint8_t *above, const uint8_t *left) {
420a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t d = vec_vsx_ld(0, left);
430a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v0 = vec_splat(d, 0);
440a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v1 = vec_splat(d, 1);
450a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v2 = vec_splat(d, 2);
460a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v3 = vec_splat(d, 3);
470a39d0a697ff3603e8c100300fda363658e10b23James Zern
480a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)above;
490a39d0a697ff3603e8c100300fda363658e10b23James Zern
500a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_sel(v0, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst);
510a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
520a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_sel(v1, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst);
530a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
540a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_sel(v2, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst);
550a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
560a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_sel(v3, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst);
570a39d0a697ff3603e8c100300fda363658e10b23James Zern}
580a39d0a697ff3603e8c100300fda363658e10b23James Zern
590a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_h_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride,
600a39d0a697ff3603e8c100300fda363658e10b23James Zern                             const uint8_t *above, const uint8_t *left) {
610a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t d = vec_vsx_ld(0, left);
620a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v0 = vec_splat(d, 0);
630a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v1 = vec_splat(d, 1);
640a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v2 = vec_splat(d, 2);
650a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v3 = vec_splat(d, 3);
660a39d0a697ff3603e8c100300fda363658e10b23James Zern
670a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v4 = vec_splat(d, 4);
680a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v5 = vec_splat(d, 5);
690a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v6 = vec_splat(d, 6);
700a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v7 = vec_splat(d, 7);
710a39d0a697ff3603e8c100300fda363658e10b23James Zern
720a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)above;
730a39d0a697ff3603e8c100300fda363658e10b23James Zern
740a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(xxpermdi(v0, vec_vsx_ld(0, dst), 1), 0, dst);
750a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
760a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(xxpermdi(v1, vec_vsx_ld(0, dst), 1), 0, dst);
770a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
780a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(xxpermdi(v2, vec_vsx_ld(0, dst), 1), 0, dst);
790a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
800a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(xxpermdi(v3, vec_vsx_ld(0, dst), 1), 0, dst);
810a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
820a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(xxpermdi(v4, vec_vsx_ld(0, dst), 1), 0, dst);
830a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
840a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(xxpermdi(v5, vec_vsx_ld(0, dst), 1), 0, dst);
850a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
860a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(xxpermdi(v6, vec_vsx_ld(0, dst), 1), 0, dst);
870a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
880a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(xxpermdi(v7, vec_vsx_ld(0, dst), 1), 0, dst);
890a39d0a697ff3603e8c100300fda363658e10b23James Zern}
900a39d0a697ff3603e8c100300fda363658e10b23James Zern
910a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
920a39d0a697ff3603e8c100300fda363658e10b23James Zern                               const uint8_t *above, const uint8_t *left) {
930a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t d = vec_vsx_ld(0, left);
940a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v0 = vec_splat(d, 0);
950a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v1 = vec_splat(d, 1);
960a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v2 = vec_splat(d, 2);
970a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v3 = vec_splat(d, 3);
980a39d0a697ff3603e8c100300fda363658e10b23James Zern
990a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v4 = vec_splat(d, 4);
1000a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v5 = vec_splat(d, 5);
1010a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v6 = vec_splat(d, 6);
1020a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v7 = vec_splat(d, 7);
1030a39d0a697ff3603e8c100300fda363658e10b23James Zern
1040a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v8 = vec_splat(d, 8);
1050a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v9 = vec_splat(d, 9);
1060a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v10 = vec_splat(d, 10);
1070a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v11 = vec_splat(d, 11);
1080a39d0a697ff3603e8c100300fda363658e10b23James Zern
1090a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v12 = vec_splat(d, 12);
1100a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v13 = vec_splat(d, 13);
1110a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v14 = vec_splat(d, 14);
1120a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v15 = vec_splat(d, 15);
1130a39d0a697ff3603e8c100300fda363658e10b23James Zern
1140a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)above;
1150a39d0a697ff3603e8c100300fda363658e10b23James Zern
1160a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v0, 0, dst);
1170a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1180a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v1, 0, dst);
1190a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1200a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v2, 0, dst);
1210a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1220a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v3, 0, dst);
1230a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1240a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v4, 0, dst);
1250a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1260a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v5, 0, dst);
1270a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1280a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v6, 0, dst);
1290a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1300a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v7, 0, dst);
1310a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1320a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v8, 0, dst);
1330a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1340a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v9, 0, dst);
1350a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1360a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v10, 0, dst);
1370a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1380a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v11, 0, dst);
1390a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1400a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v12, 0, dst);
1410a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1420a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v13, 0, dst);
1430a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1440a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v14, 0, dst);
1450a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
1460a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v15, 0, dst);
1470a39d0a697ff3603e8c100300fda363658e10b23James Zern}
1480a39d0a697ff3603e8c100300fda363658e10b23James Zern
1490a39d0a697ff3603e8c100300fda363658e10b23James Zern#define H_PREDICTOR_32(v) \
1500a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v, 0, dst);  \
1510a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(v, 16, dst); \
1520a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride
1530a39d0a697ff3603e8c100300fda363658e10b23James Zern
1540a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_h_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
1550a39d0a697ff3603e8c100300fda363658e10b23James Zern                               const uint8_t *above, const uint8_t *left) {
1560a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t d0 = vec_vsx_ld(0, left);
1570a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t d1 = vec_vsx_ld(16, left);
1580a39d0a697ff3603e8c100300fda363658e10b23James Zern
1590a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v0_0 = vec_splat(d0, 0);
1600a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v1_0 = vec_splat(d0, 1);
1610a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v2_0 = vec_splat(d0, 2);
1620a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v3_0 = vec_splat(d0, 3);
1630a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v4_0 = vec_splat(d0, 4);
1640a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v5_0 = vec_splat(d0, 5);
1650a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v6_0 = vec_splat(d0, 6);
1660a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v7_0 = vec_splat(d0, 7);
1670a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v8_0 = vec_splat(d0, 8);
1680a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v9_0 = vec_splat(d0, 9);
1690a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v10_0 = vec_splat(d0, 10);
1700a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v11_0 = vec_splat(d0, 11);
1710a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v12_0 = vec_splat(d0, 12);
1720a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v13_0 = vec_splat(d0, 13);
1730a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v14_0 = vec_splat(d0, 14);
1740a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v15_0 = vec_splat(d0, 15);
1750a39d0a697ff3603e8c100300fda363658e10b23James Zern
1760a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v0_1 = vec_splat(d1, 0);
1770a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v1_1 = vec_splat(d1, 1);
1780a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v2_1 = vec_splat(d1, 2);
1790a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v3_1 = vec_splat(d1, 3);
1800a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v4_1 = vec_splat(d1, 4);
1810a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v5_1 = vec_splat(d1, 5);
1820a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v6_1 = vec_splat(d1, 6);
1830a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v7_1 = vec_splat(d1, 7);
1840a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v8_1 = vec_splat(d1, 8);
1850a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v9_1 = vec_splat(d1, 9);
1860a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v10_1 = vec_splat(d1, 10);
1870a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v11_1 = vec_splat(d1, 11);
1880a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v12_1 = vec_splat(d1, 12);
1890a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v13_1 = vec_splat(d1, 13);
1900a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v14_1 = vec_splat(d1, 14);
1910a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v15_1 = vec_splat(d1, 15);
1920a39d0a697ff3603e8c100300fda363658e10b23James Zern
1930a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)above;
1940a39d0a697ff3603e8c100300fda363658e10b23James Zern
1950a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v0_0);
1960a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v1_0);
1970a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v2_0);
1980a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v3_0);
1990a39d0a697ff3603e8c100300fda363658e10b23James Zern
2000a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v4_0);
2010a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v5_0);
2020a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v6_0);
2030a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v7_0);
2040a39d0a697ff3603e8c100300fda363658e10b23James Zern
2050a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v8_0);
2060a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v9_0);
2070a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v10_0);
2080a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v11_0);
2090a39d0a697ff3603e8c100300fda363658e10b23James Zern
2100a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v12_0);
2110a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v13_0);
2120a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v14_0);
2130a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v15_0);
2140a39d0a697ff3603e8c100300fda363658e10b23James Zern
2150a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v0_1);
2160a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v1_1);
2170a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v2_1);
2180a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v3_1);
2190a39d0a697ff3603e8c100300fda363658e10b23James Zern
2200a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v4_1);
2210a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v5_1);
2220a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v6_1);
2230a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v7_1);
2240a39d0a697ff3603e8c100300fda363658e10b23James Zern
2250a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v8_1);
2260a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v9_1);
2270a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v10_1);
2280a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v11_1);
2290a39d0a697ff3603e8c100300fda363658e10b23James Zern
2300a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v12_1);
2310a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v13_1);
2320a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v14_1);
2330a39d0a697ff3603e8c100300fda363658e10b23James Zern  H_PREDICTOR_32(v15_1);
2340a39d0a697ff3603e8c100300fda363658e10b23James Zern}
2350a39d0a697ff3603e8c100300fda363658e10b23James Zern
2360a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_tm_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride,
2370a39d0a697ff3603e8c100300fda363658e10b23James Zern                              const uint8_t *above, const uint8_t *left) {
2380a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0));
2390a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t l = unpack_to_s16_h(vec_vsx_ld(0, left));
2400a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t a = unpack_to_s16_h(vec_vsx_ld(0, above));
2410a39d0a697ff3603e8c100300fda363658e10b23James Zern  int16x8_t tmp, val;
2420a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t d;
2430a39d0a697ff3603e8c100300fda363658e10b23James Zern
2440a39d0a697ff3603e8c100300fda363658e10b23James Zern  d = vec_vsx_ld(0, dst);
2450a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(d);
2460a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 0), a), tl);
2470a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst);
2480a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
2490a39d0a697ff3603e8c100300fda363658e10b23James Zern
2500a39d0a697ff3603e8c100300fda363658e10b23James Zern  d = vec_vsx_ld(0, dst);
2510a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(d);
2520a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 1), a), tl);
2530a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst);
2540a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
2550a39d0a697ff3603e8c100300fda363658e10b23James Zern
2560a39d0a697ff3603e8c100300fda363658e10b23James Zern  d = vec_vsx_ld(0, dst);
2570a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(d);
2580a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 2), a), tl);
2590a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst);
2600a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
2610a39d0a697ff3603e8c100300fda363658e10b23James Zern
2620a39d0a697ff3603e8c100300fda363658e10b23James Zern  d = vec_vsx_ld(0, dst);
2630a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(d);
2640a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 3), a), tl);
2650a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst);
2660a39d0a697ff3603e8c100300fda363658e10b23James Zern}
2670a39d0a697ff3603e8c100300fda363658e10b23James Zern
2680a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_tm_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride,
2690a39d0a697ff3603e8c100300fda363658e10b23James Zern                              const uint8_t *above, const uint8_t *left) {
2700a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0));
2710a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t l = unpack_to_s16_h(vec_vsx_ld(0, left));
2720a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t a = unpack_to_s16_h(vec_vsx_ld(0, above));
2730a39d0a697ff3603e8c100300fda363658e10b23James Zern  int16x8_t tmp, val;
2740a39d0a697ff3603e8c100300fda363658e10b23James Zern
2750a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(vec_vsx_ld(0, dst));
2760a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 0), a), tl);
2770a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(val, tmp), 0, dst);
2780a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
2790a39d0a697ff3603e8c100300fda363658e10b23James Zern
2800a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(vec_vsx_ld(0, dst));
2810a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 1), a), tl);
2820a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(val, tmp), 0, dst);
2830a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
2840a39d0a697ff3603e8c100300fda363658e10b23James Zern
2850a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(vec_vsx_ld(0, dst));
2860a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 2), a), tl);
2870a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(val, tmp), 0, dst);
2880a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
2890a39d0a697ff3603e8c100300fda363658e10b23James Zern
2900a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(vec_vsx_ld(0, dst));
2910a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 3), a), tl);
2920a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(val, tmp), 0, dst);
2930a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
2940a39d0a697ff3603e8c100300fda363658e10b23James Zern
2950a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(vec_vsx_ld(0, dst));
2960a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 4), a), tl);
2970a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(val, tmp), 0, dst);
2980a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
2990a39d0a697ff3603e8c100300fda363658e10b23James Zern
3000a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(vec_vsx_ld(0, dst));
3010a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 5), a), tl);
3020a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(val, tmp), 0, dst);
3030a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3040a39d0a697ff3603e8c100300fda363658e10b23James Zern
3050a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(vec_vsx_ld(0, dst));
3060a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 6), a), tl);
3070a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(val, tmp), 0, dst);
3080a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3090a39d0a697ff3603e8c100300fda363658e10b23James Zern
3100a39d0a697ff3603e8c100300fda363658e10b23James Zern  tmp = unpack_to_s16_l(vec_vsx_ld(0, dst));
3110a39d0a697ff3603e8c100300fda363658e10b23James Zern  val = vec_sub(vec_add(vec_splat(l, 7), a), tl);
3120a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(val, tmp), 0, dst);
3130a39d0a697ff3603e8c100300fda363658e10b23James Zern}
3140a39d0a697ff3603e8c100300fda363658e10b23James Zern
3150a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic void tm_predictor_16x8(uint8_t *dst, const ptrdiff_t stride, int16x8_t l,
3160a39d0a697ff3603e8c100300fda363658e10b23James Zern                              int16x8_t ah, int16x8_t al, int16x8_t tl) {
3170a39d0a697ff3603e8c100300fda363658e10b23James Zern  int16x8_t vh, vl, ls;
3180a39d0a697ff3603e8c100300fda363658e10b23James Zern
3190a39d0a697ff3603e8c100300fda363658e10b23James Zern  ls = vec_splat(l, 0);
3200a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, ah), tl);
3210a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, al), tl);
3220a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3230a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3240a39d0a697ff3603e8c100300fda363658e10b23James Zern
3250a39d0a697ff3603e8c100300fda363658e10b23James Zern  ls = vec_splat(l, 1);
3260a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, ah), tl);
3270a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, al), tl);
3280a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3290a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3300a39d0a697ff3603e8c100300fda363658e10b23James Zern
3310a39d0a697ff3603e8c100300fda363658e10b23James Zern  ls = vec_splat(l, 2);
3320a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, ah), tl);
3330a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, al), tl);
3340a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3350a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3360a39d0a697ff3603e8c100300fda363658e10b23James Zern
3370a39d0a697ff3603e8c100300fda363658e10b23James Zern  ls = vec_splat(l, 3);
3380a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, ah), tl);
3390a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, al), tl);
3400a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3410a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3420a39d0a697ff3603e8c100300fda363658e10b23James Zern
3430a39d0a697ff3603e8c100300fda363658e10b23James Zern  ls = vec_splat(l, 4);
3440a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, ah), tl);
3450a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, al), tl);
3460a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3470a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3480a39d0a697ff3603e8c100300fda363658e10b23James Zern
3490a39d0a697ff3603e8c100300fda363658e10b23James Zern  ls = vec_splat(l, 5);
3500a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, ah), tl);
3510a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, al), tl);
3520a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3530a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3540a39d0a697ff3603e8c100300fda363658e10b23James Zern
3550a39d0a697ff3603e8c100300fda363658e10b23James Zern  ls = vec_splat(l, 6);
3560a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, ah), tl);
3570a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, al), tl);
3580a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3590a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
3600a39d0a697ff3603e8c100300fda363658e10b23James Zern
3610a39d0a697ff3603e8c100300fda363658e10b23James Zern  ls = vec_splat(l, 7);
3620a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, ah), tl);
3630a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, al), tl);
3640a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3650a39d0a697ff3603e8c100300fda363658e10b23James Zern}
3660a39d0a697ff3603e8c100300fda363658e10b23James Zern
3670a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_tm_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
3680a39d0a697ff3603e8c100300fda363658e10b23James Zern                                const uint8_t *above, const uint8_t *left) {
3690a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0));
3700a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t l = vec_vsx_ld(0, left);
3710a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t lh = unpack_to_s16_h(l);
3720a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t ll = unpack_to_s16_l(l);
3730a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a = vec_vsx_ld(0, above);
3740a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t ah = unpack_to_s16_h(a);
3750a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t al = unpack_to_s16_l(a);
3760a39d0a697ff3603e8c100300fda363658e10b23James Zern
3770a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_16x8(dst, stride, lh, ah, al, tl);
3780a39d0a697ff3603e8c100300fda363658e10b23James Zern
3790a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride * 8;
3800a39d0a697ff3603e8c100300fda363658e10b23James Zern
3810a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_16x8(dst, stride, ll, ah, al, tl);
3820a39d0a697ff3603e8c100300fda363658e10b23James Zern}
3830a39d0a697ff3603e8c100300fda363658e10b23James Zern
3840a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void tm_predictor_32x1(uint8_t *dst, const int16x8_t ls,
3850a39d0a697ff3603e8c100300fda363658e10b23James Zern                                     const int16x8_t a0h, const int16x8_t a0l,
3860a39d0a697ff3603e8c100300fda363658e10b23James Zern                                     const int16x8_t a1h, const int16x8_t a1l,
3870a39d0a697ff3603e8c100300fda363658e10b23James Zern                                     const int16x8_t tl) {
3880a39d0a697ff3603e8c100300fda363658e10b23James Zern  int16x8_t vh, vl;
3890a39d0a697ff3603e8c100300fda363658e10b23James Zern
3900a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, a0h), tl);
3910a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, a0l), tl);
3920a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 0, dst);
3930a39d0a697ff3603e8c100300fda363658e10b23James Zern  vh = vec_sub(vec_add(ls, a1h), tl);
3940a39d0a697ff3603e8c100300fda363658e10b23James Zern  vl = vec_sub(vec_add(ls, a1l), tl);
3950a39d0a697ff3603e8c100300fda363658e10b23James Zern  vec_vsx_st(vec_packsu(vh, vl), 16, dst);
3960a39d0a697ff3603e8c100300fda363658e10b23James Zern}
3970a39d0a697ff3603e8c100300fda363658e10b23James Zern
3980a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic void tm_predictor_32x8(uint8_t *dst, const ptrdiff_t stride,
3990a39d0a697ff3603e8c100300fda363658e10b23James Zern                              const int16x8_t l, const uint8x16_t a0,
4000a39d0a697ff3603e8c100300fda363658e10b23James Zern                              const uint8x16_t a1, const int16x8_t tl) {
4010a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t a0h = unpack_to_s16_h(a0);
4020a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t a0l = unpack_to_s16_l(a0);
4030a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t a1h = unpack_to_s16_h(a1);
4040a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t a1l = unpack_to_s16_l(a1);
4050a39d0a697ff3603e8c100300fda363658e10b23James Zern
4060a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x1(dst, vec_splat(l, 0), a0h, a0l, a1h, a1l, tl);
4070a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
4080a39d0a697ff3603e8c100300fda363658e10b23James Zern
4090a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x1(dst, vec_splat(l, 1), a0h, a0l, a1h, a1l, tl);
4100a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
4110a39d0a697ff3603e8c100300fda363658e10b23James Zern
4120a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x1(dst, vec_splat(l, 2), a0h, a0l, a1h, a1l, tl);
4130a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
4140a39d0a697ff3603e8c100300fda363658e10b23James Zern
4150a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x1(dst, vec_splat(l, 3), a0h, a0l, a1h, a1l, tl);
4160a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
4170a39d0a697ff3603e8c100300fda363658e10b23James Zern
4180a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x1(dst, vec_splat(l, 4), a0h, a0l, a1h, a1l, tl);
4190a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
4200a39d0a697ff3603e8c100300fda363658e10b23James Zern
4210a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x1(dst, vec_splat(l, 5), a0h, a0l, a1h, a1l, tl);
4220a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
4230a39d0a697ff3603e8c100300fda363658e10b23James Zern
4240a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x1(dst, vec_splat(l, 6), a0h, a0l, a1h, a1l, tl);
4250a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride;
4260a39d0a697ff3603e8c100300fda363658e10b23James Zern
4270a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x1(dst, vec_splat(l, 7), a0h, a0l, a1h, a1l, tl);
4280a39d0a697ff3603e8c100300fda363658e10b23James Zern}
4290a39d0a697ff3603e8c100300fda363658e10b23James Zern
4300a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_tm_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
4310a39d0a697ff3603e8c100300fda363658e10b23James Zern                                const uint8_t *above, const uint8_t *left) {
4320a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0));
4330a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t l0 = vec_vsx_ld(0, left);
4340a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t l1 = vec_vsx_ld(16, left);
4350a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a0 = vec_vsx_ld(0, above);
4360a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a1 = vec_vsx_ld(16, above);
4370a39d0a697ff3603e8c100300fda363658e10b23James Zern
4380a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x8(dst, stride, unpack_to_s16_h(l0), a0, a1, tl);
4390a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride * 8;
4400a39d0a697ff3603e8c100300fda363658e10b23James Zern
4410a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x8(dst, stride, unpack_to_s16_l(l0), a0, a1, tl);
4420a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride * 8;
4430a39d0a697ff3603e8c100300fda363658e10b23James Zern
4440a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x8(dst, stride, unpack_to_s16_h(l1), a0, a1, tl);
4450a39d0a697ff3603e8c100300fda363658e10b23James Zern  dst += stride * 8;
4460a39d0a697ff3603e8c100300fda363658e10b23James Zern
4470a39d0a697ff3603e8c100300fda363658e10b23James Zern  tm_predictor_32x8(dst, stride, unpack_to_s16_l(l1), a0, a1, tl);
4480a39d0a697ff3603e8c100300fda363658e10b23James Zern}
4490a39d0a697ff3603e8c100300fda363658e10b23James Zern
4500a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void dc_fill_predictor_8x8(uint8_t *dst, const ptrdiff_t stride,
4510a39d0a697ff3603e8c100300fda363658e10b23James Zern                                         const uint8x16_t val) {
4520a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
4530a39d0a697ff3603e8c100300fda363658e10b23James Zern
4540a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 8; i++, dst += stride) {
4550a39d0a697ff3603e8c100300fda363658e10b23James Zern    const uint8x16_t d = vec_vsx_ld(0, dst);
4560a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(xxpermdi(val, d, 1), 0, dst);
4570a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
4580a39d0a697ff3603e8c100300fda363658e10b23James Zern}
4590a39d0a697ff3603e8c100300fda363658e10b23James Zern
4600a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void dc_fill_predictor_16x16(uint8_t *dst, const ptrdiff_t stride,
4610a39d0a697ff3603e8c100300fda363658e10b23James Zern                                           const uint8x16_t val) {
4620a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
4630a39d0a697ff3603e8c100300fda363658e10b23James Zern
4640a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 16; i++, dst += stride) {
4650a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(val, 0, dst);
4660a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
4670a39d0a697ff3603e8c100300fda363658e10b23James Zern}
4680a39d0a697ff3603e8c100300fda363658e10b23James Zern
4690a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_128_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
4700a39d0a697ff3603e8c100300fda363658e10b23James Zern                                    const uint8_t *above, const uint8_t *left) {
4710a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v128 = vec_sl(vec_splat_u8(1), vec_splat_u8(7));
4720a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)above;
4730a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
4740a39d0a697ff3603e8c100300fda363658e10b23James Zern
4750a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_16x16(dst, stride, v128);
4760a39d0a697ff3603e8c100300fda363658e10b23James Zern}
4770a39d0a697ff3603e8c100300fda363658e10b23James Zern
4780a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void dc_fill_predictor_32x32(uint8_t *dst, const ptrdiff_t stride,
4790a39d0a697ff3603e8c100300fda363658e10b23James Zern                                           const uint8x16_t val) {
4800a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
4810a39d0a697ff3603e8c100300fda363658e10b23James Zern
4820a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 32; i++, dst += stride) {
4830a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(val, 0, dst);
4840a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(val, 16, dst);
4850a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
4860a39d0a697ff3603e8c100300fda363658e10b23James Zern}
4870a39d0a697ff3603e8c100300fda363658e10b23James Zern
4880a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_128_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
4890a39d0a697ff3603e8c100300fda363658e10b23James Zern                                    const uint8_t *above, const uint8_t *left) {
4900a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v128 = vec_sl(vec_splat_u8(1), vec_splat_u8(7));
4910a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)above;
4920a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
4930a39d0a697ff3603e8c100300fda363658e10b23James Zern
4940a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_32x32(dst, stride, v128);
4950a39d0a697ff3603e8c100300fda363658e10b23James Zern}
4960a39d0a697ff3603e8c100300fda363658e10b23James Zern
4970a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t avg16(const uint8_t *values) {
4980a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t sum4s =
4990a39d0a697ff3603e8c100300fda363658e10b23James Zern      (int32x4_t)vec_sum4s(vec_vsx_ld(0, values), vec_splat_u32(0));
5000a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, vec_splat_s32(8));
5010a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(4));
5020a39d0a697ff3603e8c100300fda363658e10b23James Zern
5030a39d0a697ff3603e8c100300fda363658e10b23James Zern  return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)),
5040a39d0a697ff3603e8c100300fda363658e10b23James Zern                   3);
5050a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5060a39d0a697ff3603e8c100300fda363658e10b23James Zern
5070a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_left_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
5080a39d0a697ff3603e8c100300fda363658e10b23James Zern                                     const uint8_t *above,
5090a39d0a697ff3603e8c100300fda363658e10b23James Zern                                     const uint8_t *left) {
5100a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)above;
5110a39d0a697ff3603e8c100300fda363658e10b23James Zern
5120a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_16x16(dst, stride, avg16(left));
5130a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5140a39d0a697ff3603e8c100300fda363658e10b23James Zern
5150a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_top_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
5160a39d0a697ff3603e8c100300fda363658e10b23James Zern                                    const uint8_t *above, const uint8_t *left) {
5170a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
5180a39d0a697ff3603e8c100300fda363658e10b23James Zern
5190a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_16x16(dst, stride, avg16(above));
5200a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5210a39d0a697ff3603e8c100300fda363658e10b23James Zern
5220a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t avg32(const uint8_t *values) {
5230a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v0 = vec_vsx_ld(0, values);
5240a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t v1 = vec_vsx_ld(16, values);
5250a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t v16 = vec_sl(vec_splat_s32(1), vec_splat_u32(4));
5260a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t sum4s =
5270a39d0a697ff3603e8c100300fda363658e10b23James Zern      (int32x4_t)vec_sum4s(v0, vec_sum4s(v1, vec_splat_u32(0)));
5280a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v16);
5290a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(5));
5300a39d0a697ff3603e8c100300fda363658e10b23James Zern
5310a39d0a697ff3603e8c100300fda363658e10b23James Zern  return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)),
5320a39d0a697ff3603e8c100300fda363658e10b23James Zern                   3);
5330a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5340a39d0a697ff3603e8c100300fda363658e10b23James Zern
5350a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_left_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
5360a39d0a697ff3603e8c100300fda363658e10b23James Zern                                     const uint8_t *above,
5370a39d0a697ff3603e8c100300fda363658e10b23James Zern                                     const uint8_t *left) {
5380a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)above;
5390a39d0a697ff3603e8c100300fda363658e10b23James Zern
5400a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_32x32(dst, stride, avg32(left));
5410a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5420a39d0a697ff3603e8c100300fda363658e10b23James Zern
5430a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_top_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
5440a39d0a697ff3603e8c100300fda363658e10b23James Zern                                    const uint8_t *above, const uint8_t *left) {
5450a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
5460a39d0a697ff3603e8c100300fda363658e10b23James Zern
5470a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_32x32(dst, stride, avg32(above));
5480a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5490a39d0a697ff3603e8c100300fda363658e10b23James Zern
5500a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t dc_avg8(const uint8_t *above, const uint8_t *left) {
5510a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a0 = vec_vsx_ld(0, above);
5520a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t l0 = vec_vsx_ld(0, left);
5530a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t sum4s =
5540a39d0a697ff3603e8c100300fda363658e10b23James Zern      (int32x4_t)vec_sum4s(l0, vec_sum4s(a0, vec_splat_u32(0)));
5550a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t sum4s8 = xxpermdi(sum4s, vec_splat_s32(0), 1);
5560a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s8, vec_splat_s32(8));
5570a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(4));
5580a39d0a697ff3603e8c100300fda363658e10b23James Zern
5590a39d0a697ff3603e8c100300fda363658e10b23James Zern  return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)),
5600a39d0a697ff3603e8c100300fda363658e10b23James Zern                   3);
5610a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5620a39d0a697ff3603e8c100300fda363658e10b23James Zern
5630a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t dc_avg16(const uint8_t *above, const uint8_t *left) {
5640a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a0 = vec_vsx_ld(0, above);
5650a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t l0 = vec_vsx_ld(0, left);
5660a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t v16 = vec_sl(vec_splat_s32(1), vec_splat_u32(4));
5670a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t sum4s =
5680a39d0a697ff3603e8c100300fda363658e10b23James Zern      (int32x4_t)vec_sum4s(l0, vec_sum4s(a0, vec_splat_u32(0)));
5690a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v16);
5700a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(5));
5710a39d0a697ff3603e8c100300fda363658e10b23James Zern
5720a39d0a697ff3603e8c100300fda363658e10b23James Zern  return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)),
5730a39d0a697ff3603e8c100300fda363658e10b23James Zern                   3);
5740a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5750a39d0a697ff3603e8c100300fda363658e10b23James Zern
5760a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride,
5770a39d0a697ff3603e8c100300fda363658e10b23James Zern                              const uint8_t *above, const uint8_t *left) {
5780a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_8x8(dst, stride, dc_avg8(above, left));
5790a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5800a39d0a697ff3603e8c100300fda363658e10b23James Zern
5810a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
5820a39d0a697ff3603e8c100300fda363658e10b23James Zern                                const uint8_t *above, const uint8_t *left) {
5830a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_16x16(dst, stride, dc_avg16(above, left));
5840a39d0a697ff3603e8c100300fda363658e10b23James Zern}
5850a39d0a697ff3603e8c100300fda363658e10b23James Zern
5860a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t dc_avg32(const uint8_t *above, const uint8_t *left) {
5870a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a0 = vec_vsx_ld(0, above);
5880a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a1 = vec_vsx_ld(16, above);
5890a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t l0 = vec_vsx_ld(0, left);
5900a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t l1 = vec_vsx_ld(16, left);
5910a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t v32 = vec_sl(vec_splat_s32(1), vec_splat_u32(5));
5920a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t a_sum = vec_sum4s(a0, vec_sum4s(a1, vec_splat_u32(0)));
5930a39d0a697ff3603e8c100300fda363658e10b23James Zern  const int32x4_t sum4s = (int32x4_t)vec_sum4s(l0, vec_sum4s(l1, a_sum));
5940a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v32);
5950a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(6));
5960a39d0a697ff3603e8c100300fda363658e10b23James Zern
5970a39d0a697ff3603e8c100300fda363658e10b23James Zern  return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)),
5980a39d0a697ff3603e8c100300fda363658e10b23James Zern                   3);
5990a39d0a697ff3603e8c100300fda363658e10b23James Zern}
6000a39d0a697ff3603e8c100300fda363658e10b23James Zern
6010a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
6020a39d0a697ff3603e8c100300fda363658e10b23James Zern                                const uint8_t *above, const uint8_t *left) {
6030a39d0a697ff3603e8c100300fda363658e10b23James Zern  dc_fill_predictor_32x32(dst, stride, dc_avg32(above, left));
6040a39d0a697ff3603e8c100300fda363658e10b23James Zern}
6050a39d0a697ff3603e8c100300fda363658e10b23James Zern
6060a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t avg3(const uint8x16_t a, const uint8x16_t b,
6070a39d0a697ff3603e8c100300fda363658e10b23James Zern                       const uint8x16_t c) {
6080a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t ac =
6090a39d0a697ff3603e8c100300fda363658e10b23James Zern      vec_adds(vec_and(a, c), vec_sr(vec_xor(a, c), vec_splat_u8(1)));
6100a39d0a697ff3603e8c100300fda363658e10b23James Zern
6110a39d0a697ff3603e8c100300fda363658e10b23James Zern  return vec_avg(ac, b);
6120a39d0a697ff3603e8c100300fda363658e10b23James Zern}
6130a39d0a697ff3603e8c100300fda363658e10b23James Zern
6140a39d0a697ff3603e8c100300fda363658e10b23James Zern// Workaround vec_sld/vec_xxsldi/vec_lsdoi being missing or broken.
6150a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic const uint8x16_t sl1 = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
6160a39d0a697ff3603e8c100300fda363658e10b23James Zern                                0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10 };
6170a39d0a697ff3603e8c100300fda363658e10b23James Zern
6180a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d45_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride,
6190a39d0a697ff3603e8c100300fda363658e10b23James Zern                               const uint8_t *above, const uint8_t *left) {
6200a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t af = vec_vsx_ld(0, above);
6210a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t above_right = vec_splat(af, 7);
6220a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a = xxpermdi(af, above_right, 1);
6230a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t b = vec_perm(a, above_right, sl1);
6240a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t c = vec_perm(b, above_right, sl1);
6250a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row = avg3(a, b, c);
6260a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
6270a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
6280a39d0a697ff3603e8c100300fda363658e10b23James Zern
6290a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 8; i++) {
6300a39d0a697ff3603e8c100300fda363658e10b23James Zern    const uint8x16_t d = vec_vsx_ld(0, dst);
6310a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(xxpermdi(row, d, 1), 0, dst);
6320a39d0a697ff3603e8c100300fda363658e10b23James Zern    dst += stride;
6330a39d0a697ff3603e8c100300fda363658e10b23James Zern    row = vec_perm(row, above_right, sl1);
6340a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
6350a39d0a697ff3603e8c100300fda363658e10b23James Zern}
6360a39d0a697ff3603e8c100300fda363658e10b23James Zern
6370a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d45_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
6380a39d0a697ff3603e8c100300fda363658e10b23James Zern                                 const uint8_t *above, const uint8_t *left) {
6390a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a = vec_vsx_ld(0, above);
6400a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t above_right = vec_splat(a, 15);
6410a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t b = vec_perm(a, above_right, sl1);
6420a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t c = vec_perm(b, above_right, sl1);
6430a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row = avg3(a, b, c);
6440a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
6450a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
6460a39d0a697ff3603e8c100300fda363658e10b23James Zern
6470a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 16; i++) {
6480a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row, 0, dst);
6490a39d0a697ff3603e8c100300fda363658e10b23James Zern    dst += stride;
6500a39d0a697ff3603e8c100300fda363658e10b23James Zern    row = vec_perm(row, above_right, sl1);
6510a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
6520a39d0a697ff3603e8c100300fda363658e10b23James Zern}
6530a39d0a697ff3603e8c100300fda363658e10b23James Zern
6540a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d45_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
6550a39d0a697ff3603e8c100300fda363658e10b23James Zern                                 const uint8_t *above, const uint8_t *left) {
6560a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a0 = vec_vsx_ld(0, above);
6570a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a1 = vec_vsx_ld(16, above);
6580a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t above_right = vec_splat(a1, 15);
6590a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t b0 = vec_perm(a0, a1, sl1);
6600a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t b1 = vec_perm(a1, above_right, sl1);
6610a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t c0 = vec_perm(b0, b1, sl1);
6620a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t c1 = vec_perm(b1, above_right, sl1);
6630a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row0 = avg3(a0, b0, c0);
6640a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row1 = avg3(a1, b1, c1);
6650a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
6660a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
6670a39d0a697ff3603e8c100300fda363658e10b23James Zern
6680a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 32; i++) {
6690a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row0, 0, dst);
6700a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row1, 16, dst);
6710a39d0a697ff3603e8c100300fda363658e10b23James Zern    dst += stride;
6720a39d0a697ff3603e8c100300fda363658e10b23James Zern    row0 = vec_perm(row0, row1, sl1);
6730a39d0a697ff3603e8c100300fda363658e10b23James Zern    row1 = vec_perm(row1, above_right, sl1);
6740a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
6750a39d0a697ff3603e8c100300fda363658e10b23James Zern}
6760a39d0a697ff3603e8c100300fda363658e10b23James Zern
6770a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d63_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride,
6780a39d0a697ff3603e8c100300fda363658e10b23James Zern                               const uint8_t *above, const uint8_t *left) {
6790a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t af = vec_vsx_ld(0, above);
6800a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t above_right = vec_splat(af, 9);
6810a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a = xxpermdi(af, above_right, 1);
6820a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t b = vec_perm(a, above_right, sl1);
6830a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t c = vec_perm(b, above_right, sl1);
6840a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row0 = vec_avg(a, b);
6850a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row1 = avg3(a, b, c);
6860a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
6870a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
6880a39d0a697ff3603e8c100300fda363658e10b23James Zern
6890a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 4; i++) {
6900a39d0a697ff3603e8c100300fda363658e10b23James Zern    const uint8x16_t d0 = vec_vsx_ld(0, dst);
6910a39d0a697ff3603e8c100300fda363658e10b23James Zern    const uint8x16_t d1 = vec_vsx_ld(0, dst + stride);
6920a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(xxpermdi(row0, d0, 1), 0, dst);
6930a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(xxpermdi(row1, d1, 1), 0, dst + stride);
6940a39d0a697ff3603e8c100300fda363658e10b23James Zern    dst += stride * 2;
6950a39d0a697ff3603e8c100300fda363658e10b23James Zern    row0 = vec_perm(row0, above_right, sl1);
6960a39d0a697ff3603e8c100300fda363658e10b23James Zern    row1 = vec_perm(row1, above_right, sl1);
6970a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
6980a39d0a697ff3603e8c100300fda363658e10b23James Zern}
6990a39d0a697ff3603e8c100300fda363658e10b23James Zern
7000a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d63_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
7010a39d0a697ff3603e8c100300fda363658e10b23James Zern                                 const uint8_t *above, const uint8_t *left) {
7020a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a0 = vec_vsx_ld(0, above);
7030a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a1 = vec_vsx_ld(16, above);
7040a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t above_right = vec_splat(a1, 0);
7050a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t b = vec_perm(a0, above_right, sl1);
7060a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t c = vec_perm(b, above_right, sl1);
7070a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row0 = vec_avg(a0, b);
7080a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row1 = avg3(a0, b, c);
7090a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
7100a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
7110a39d0a697ff3603e8c100300fda363658e10b23James Zern
7120a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 8; i++) {
7130a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row0, 0, dst);
7140a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row1, 0, dst + stride);
7150a39d0a697ff3603e8c100300fda363658e10b23James Zern    dst += stride * 2;
7160a39d0a697ff3603e8c100300fda363658e10b23James Zern    row0 = vec_perm(row0, above_right, sl1);
7170a39d0a697ff3603e8c100300fda363658e10b23James Zern    row1 = vec_perm(row1, above_right, sl1);
7180a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
7190a39d0a697ff3603e8c100300fda363658e10b23James Zern}
7200a39d0a697ff3603e8c100300fda363658e10b23James Zern
7210a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d63_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride,
7220a39d0a697ff3603e8c100300fda363658e10b23James Zern                                 const uint8_t *above, const uint8_t *left) {
7230a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a0 = vec_vsx_ld(0, above);
7240a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a1 = vec_vsx_ld(16, above);
7250a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t a2 = vec_vsx_ld(32, above);
7260a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t above_right = vec_splat(a2, 0);
7270a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t b0 = vec_perm(a0, a1, sl1);
7280a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t b1 = vec_perm(a1, above_right, sl1);
7290a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t c0 = vec_perm(b0, b1, sl1);
7300a39d0a697ff3603e8c100300fda363658e10b23James Zern  const uint8x16_t c1 = vec_perm(b1, above_right, sl1);
7310a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row0_0 = vec_avg(a0, b0);
7320a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row0_1 = vec_avg(a1, b1);
7330a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row1_0 = avg3(a0, b0, c0);
7340a39d0a697ff3603e8c100300fda363658e10b23James Zern  uint8x16_t row1_1 = avg3(a1, b1, c1);
7350a39d0a697ff3603e8c100300fda363658e10b23James Zern  int i;
7360a39d0a697ff3603e8c100300fda363658e10b23James Zern  (void)left;
7370a39d0a697ff3603e8c100300fda363658e10b23James Zern
7380a39d0a697ff3603e8c100300fda363658e10b23James Zern  for (i = 0; i < 16; i++) {
7390a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row0_0, 0, dst);
7400a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row0_1, 16, dst);
7410a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row1_0, 0, dst + stride);
7420a39d0a697ff3603e8c100300fda363658e10b23James Zern    vec_vsx_st(row1_1, 16, dst + stride);
7430a39d0a697ff3603e8c100300fda363658e10b23James Zern    dst += stride * 2;
7440a39d0a697ff3603e8c100300fda363658e10b23James Zern    row0_0 = vec_perm(row0_0, row0_1, sl1);
7450a39d0a697ff3603e8c100300fda363658e10b23James Zern    row0_1 = vec_perm(row0_1, above_right, sl1);
7460a39d0a697ff3603e8c100300fda363658e10b23James Zern    row1_0 = vec_perm(row1_0, row1_1, sl1);
7470a39d0a697ff3603e8c100300fda363658e10b23James Zern    row1_1 = vec_perm(row1_1, above_right, sl1);
7480a39d0a697ff3603e8c100300fda363658e10b23James Zern  }
7490a39d0a697ff3603e8c100300fda363658e10b23James Zern}
750