10a39d0a697ff3603e8c100300fda363658e10b23James Zern/* 20a39d0a697ff3603e8c100300fda363658e10b23James Zern * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 30a39d0a697ff3603e8c100300fda363658e10b23James Zern * 40a39d0a697ff3603e8c100300fda363658e10b23James Zern * Use of this source code is governed by a BSD-style license 50a39d0a697ff3603e8c100300fda363658e10b23James Zern * that can be found in the LICENSE file in the root of the source 60a39d0a697ff3603e8c100300fda363658e10b23James Zern * tree. An additional intellectual property rights grant can be found 70a39d0a697ff3603e8c100300fda363658e10b23James Zern * in the file PATENTS. All contributing project authors may 80a39d0a697ff3603e8c100300fda363658e10b23James Zern * be found in the AUTHORS file in the root of the source tree. 90a39d0a697ff3603e8c100300fda363658e10b23James Zern */ 100a39d0a697ff3603e8c100300fda363658e10b23James Zern 110a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "./vpx_dsp_rtcd.h" 120a39d0a697ff3603e8c100300fda363658e10b23James Zern#include "vpx_dsp/ppc/types_vsx.h" 130a39d0a697ff3603e8c100300fda363658e10b23James Zern 140a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_v_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 150a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 160a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d = vec_vsx_ld(0, above); 170a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 180a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 190a39d0a697ff3603e8c100300fda363658e10b23James Zern 200a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 16; i++, dst += stride) { 210a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(d, 0, dst); 220a39d0a697ff3603e8c100300fda363658e10b23James Zern } 230a39d0a697ff3603e8c100300fda363658e10b23James Zern} 240a39d0a697ff3603e8c100300fda363658e10b23James Zern 250a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_v_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 260a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 270a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d0 = vec_vsx_ld(0, above); 280a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d1 = vec_vsx_ld(16, above); 290a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 300a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 310a39d0a697ff3603e8c100300fda363658e10b23James Zern 320a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 32; i++, dst += stride) { 330a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(d0, 0, dst); 340a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(d1, 16, dst); 350a39d0a697ff3603e8c100300fda363658e10b23James Zern } 360a39d0a697ff3603e8c100300fda363658e10b23James Zern} 370a39d0a697ff3603e8c100300fda363658e10b23James Zern 380a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic const uint32x4_t mask4 = { 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; 390a39d0a697ff3603e8c100300fda363658e10b23James Zern 400a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_h_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride, 410a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 420a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d = vec_vsx_ld(0, left); 430a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v0 = vec_splat(d, 0); 440a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v1 = vec_splat(d, 1); 450a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v2 = vec_splat(d, 2); 460a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v3 = vec_splat(d, 3); 470a39d0a697ff3603e8c100300fda363658e10b23James Zern 480a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)above; 490a39d0a697ff3603e8c100300fda363658e10b23James Zern 500a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_sel(v0, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); 510a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 520a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_sel(v1, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); 530a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 540a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_sel(v2, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); 550a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 560a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_sel(v3, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); 570a39d0a697ff3603e8c100300fda363658e10b23James Zern} 580a39d0a697ff3603e8c100300fda363658e10b23James Zern 590a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_h_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, 600a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 610a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d = vec_vsx_ld(0, left); 620a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v0 = vec_splat(d, 0); 630a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v1 = vec_splat(d, 1); 640a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v2 = vec_splat(d, 2); 650a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v3 = vec_splat(d, 3); 660a39d0a697ff3603e8c100300fda363658e10b23James Zern 670a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v4 = vec_splat(d, 4); 680a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v5 = vec_splat(d, 5); 690a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v6 = vec_splat(d, 6); 700a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v7 = vec_splat(d, 7); 710a39d0a697ff3603e8c100300fda363658e10b23James Zern 720a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)above; 730a39d0a697ff3603e8c100300fda363658e10b23James Zern 740a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(v0, vec_vsx_ld(0, dst), 1), 0, dst); 750a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 760a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(v1, vec_vsx_ld(0, dst), 1), 0, dst); 770a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 780a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(v2, vec_vsx_ld(0, dst), 1), 0, dst); 790a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 800a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(v3, vec_vsx_ld(0, dst), 1), 0, dst); 810a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 820a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(v4, vec_vsx_ld(0, dst), 1), 0, dst); 830a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 840a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(v5, vec_vsx_ld(0, dst), 1), 0, dst); 850a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 860a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(v6, vec_vsx_ld(0, dst), 1), 0, dst); 870a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 880a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(v7, vec_vsx_ld(0, dst), 1), 0, dst); 890a39d0a697ff3603e8c100300fda363658e10b23James Zern} 900a39d0a697ff3603e8c100300fda363658e10b23James Zern 910a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 920a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 930a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d = vec_vsx_ld(0, left); 940a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v0 = vec_splat(d, 0); 950a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v1 = vec_splat(d, 1); 960a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v2 = vec_splat(d, 2); 970a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v3 = vec_splat(d, 3); 980a39d0a697ff3603e8c100300fda363658e10b23James Zern 990a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v4 = vec_splat(d, 4); 1000a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v5 = vec_splat(d, 5); 1010a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v6 = vec_splat(d, 6); 1020a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v7 = vec_splat(d, 7); 1030a39d0a697ff3603e8c100300fda363658e10b23James Zern 1040a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v8 = vec_splat(d, 8); 1050a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v9 = vec_splat(d, 9); 1060a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v10 = vec_splat(d, 10); 1070a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v11 = vec_splat(d, 11); 1080a39d0a697ff3603e8c100300fda363658e10b23James Zern 1090a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v12 = vec_splat(d, 12); 1100a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v13 = vec_splat(d, 13); 1110a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v14 = vec_splat(d, 14); 1120a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v15 = vec_splat(d, 15); 1130a39d0a697ff3603e8c100300fda363658e10b23James Zern 1140a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)above; 1150a39d0a697ff3603e8c100300fda363658e10b23James Zern 1160a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v0, 0, dst); 1170a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1180a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v1, 0, dst); 1190a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1200a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v2, 0, dst); 1210a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1220a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v3, 0, dst); 1230a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1240a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v4, 0, dst); 1250a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1260a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v5, 0, dst); 1270a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1280a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v6, 0, dst); 1290a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1300a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v7, 0, dst); 1310a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1320a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v8, 0, dst); 1330a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1340a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v9, 0, dst); 1350a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1360a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v10, 0, dst); 1370a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1380a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v11, 0, dst); 1390a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1400a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v12, 0, dst); 1410a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1420a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v13, 0, dst); 1430a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1440a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v14, 0, dst); 1450a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 1460a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v15, 0, dst); 1470a39d0a697ff3603e8c100300fda363658e10b23James Zern} 1480a39d0a697ff3603e8c100300fda363658e10b23James Zern 1490a39d0a697ff3603e8c100300fda363658e10b23James Zern#define H_PREDICTOR_32(v) \ 1500a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v, 0, dst); \ 1510a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(v, 16, dst); \ 1520a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride 1530a39d0a697ff3603e8c100300fda363658e10b23James Zern 1540a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_h_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 1550a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 1560a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d0 = vec_vsx_ld(0, left); 1570a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d1 = vec_vsx_ld(16, left); 1580a39d0a697ff3603e8c100300fda363658e10b23James Zern 1590a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v0_0 = vec_splat(d0, 0); 1600a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v1_0 = vec_splat(d0, 1); 1610a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v2_0 = vec_splat(d0, 2); 1620a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v3_0 = vec_splat(d0, 3); 1630a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v4_0 = vec_splat(d0, 4); 1640a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v5_0 = vec_splat(d0, 5); 1650a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v6_0 = vec_splat(d0, 6); 1660a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v7_0 = vec_splat(d0, 7); 1670a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v8_0 = vec_splat(d0, 8); 1680a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v9_0 = vec_splat(d0, 9); 1690a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v10_0 = vec_splat(d0, 10); 1700a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v11_0 = vec_splat(d0, 11); 1710a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v12_0 = vec_splat(d0, 12); 1720a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v13_0 = vec_splat(d0, 13); 1730a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v14_0 = vec_splat(d0, 14); 1740a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v15_0 = vec_splat(d0, 15); 1750a39d0a697ff3603e8c100300fda363658e10b23James Zern 1760a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v0_1 = vec_splat(d1, 0); 1770a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v1_1 = vec_splat(d1, 1); 1780a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v2_1 = vec_splat(d1, 2); 1790a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v3_1 = vec_splat(d1, 3); 1800a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v4_1 = vec_splat(d1, 4); 1810a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v5_1 = vec_splat(d1, 5); 1820a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v6_1 = vec_splat(d1, 6); 1830a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v7_1 = vec_splat(d1, 7); 1840a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v8_1 = vec_splat(d1, 8); 1850a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v9_1 = vec_splat(d1, 9); 1860a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v10_1 = vec_splat(d1, 10); 1870a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v11_1 = vec_splat(d1, 11); 1880a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v12_1 = vec_splat(d1, 12); 1890a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v13_1 = vec_splat(d1, 13); 1900a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v14_1 = vec_splat(d1, 14); 1910a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v15_1 = vec_splat(d1, 15); 1920a39d0a697ff3603e8c100300fda363658e10b23James Zern 1930a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)above; 1940a39d0a697ff3603e8c100300fda363658e10b23James Zern 1950a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v0_0); 1960a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v1_0); 1970a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v2_0); 1980a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v3_0); 1990a39d0a697ff3603e8c100300fda363658e10b23James Zern 2000a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v4_0); 2010a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v5_0); 2020a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v6_0); 2030a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v7_0); 2040a39d0a697ff3603e8c100300fda363658e10b23James Zern 2050a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v8_0); 2060a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v9_0); 2070a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v10_0); 2080a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v11_0); 2090a39d0a697ff3603e8c100300fda363658e10b23James Zern 2100a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v12_0); 2110a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v13_0); 2120a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v14_0); 2130a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v15_0); 2140a39d0a697ff3603e8c100300fda363658e10b23James Zern 2150a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v0_1); 2160a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v1_1); 2170a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v2_1); 2180a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v3_1); 2190a39d0a697ff3603e8c100300fda363658e10b23James Zern 2200a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v4_1); 2210a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v5_1); 2220a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v6_1); 2230a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v7_1); 2240a39d0a697ff3603e8c100300fda363658e10b23James Zern 2250a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v8_1); 2260a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v9_1); 2270a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v10_1); 2280a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v11_1); 2290a39d0a697ff3603e8c100300fda363658e10b23James Zern 2300a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v12_1); 2310a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v13_1); 2320a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v14_1); 2330a39d0a697ff3603e8c100300fda363658e10b23James Zern H_PREDICTOR_32(v15_1); 2340a39d0a697ff3603e8c100300fda363658e10b23James Zern} 2350a39d0a697ff3603e8c100300fda363658e10b23James Zern 2360a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_tm_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride, 2370a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 2380a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); 2390a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t l = unpack_to_s16_h(vec_vsx_ld(0, left)); 2400a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t a = unpack_to_s16_h(vec_vsx_ld(0, above)); 2410a39d0a697ff3603e8c100300fda363658e10b23James Zern int16x8_t tmp, val; 2420a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t d; 2430a39d0a697ff3603e8c100300fda363658e10b23James Zern 2440a39d0a697ff3603e8c100300fda363658e10b23James Zern d = vec_vsx_ld(0, dst); 2450a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(d); 2460a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 0), a), tl); 2470a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst); 2480a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 2490a39d0a697ff3603e8c100300fda363658e10b23James Zern 2500a39d0a697ff3603e8c100300fda363658e10b23James Zern d = vec_vsx_ld(0, dst); 2510a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(d); 2520a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 1), a), tl); 2530a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst); 2540a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 2550a39d0a697ff3603e8c100300fda363658e10b23James Zern 2560a39d0a697ff3603e8c100300fda363658e10b23James Zern d = vec_vsx_ld(0, dst); 2570a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(d); 2580a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 2), a), tl); 2590a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst); 2600a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 2610a39d0a697ff3603e8c100300fda363658e10b23James Zern 2620a39d0a697ff3603e8c100300fda363658e10b23James Zern d = vec_vsx_ld(0, dst); 2630a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(d); 2640a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 3), a), tl); 2650a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst); 2660a39d0a697ff3603e8c100300fda363658e10b23James Zern} 2670a39d0a697ff3603e8c100300fda363658e10b23James Zern 2680a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_tm_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, 2690a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 2700a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); 2710a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t l = unpack_to_s16_h(vec_vsx_ld(0, left)); 2720a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t a = unpack_to_s16_h(vec_vsx_ld(0, above)); 2730a39d0a697ff3603e8c100300fda363658e10b23James Zern int16x8_t tmp, val; 2740a39d0a697ff3603e8c100300fda363658e10b23James Zern 2750a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); 2760a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 0), a), tl); 2770a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(val, tmp), 0, dst); 2780a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 2790a39d0a697ff3603e8c100300fda363658e10b23James Zern 2800a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); 2810a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 1), a), tl); 2820a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(val, tmp), 0, dst); 2830a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 2840a39d0a697ff3603e8c100300fda363658e10b23James Zern 2850a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); 2860a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 2), a), tl); 2870a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(val, tmp), 0, dst); 2880a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 2890a39d0a697ff3603e8c100300fda363658e10b23James Zern 2900a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); 2910a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 3), a), tl); 2920a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(val, tmp), 0, dst); 2930a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 2940a39d0a697ff3603e8c100300fda363658e10b23James Zern 2950a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); 2960a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 4), a), tl); 2970a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(val, tmp), 0, dst); 2980a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 2990a39d0a697ff3603e8c100300fda363658e10b23James Zern 3000a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); 3010a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 5), a), tl); 3020a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(val, tmp), 0, dst); 3030a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3040a39d0a697ff3603e8c100300fda363658e10b23James Zern 3050a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); 3060a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 6), a), tl); 3070a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(val, tmp), 0, dst); 3080a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3090a39d0a697ff3603e8c100300fda363658e10b23James Zern 3100a39d0a697ff3603e8c100300fda363658e10b23James Zern tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); 3110a39d0a697ff3603e8c100300fda363658e10b23James Zern val = vec_sub(vec_add(vec_splat(l, 7), a), tl); 3120a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(val, tmp), 0, dst); 3130a39d0a697ff3603e8c100300fda363658e10b23James Zern} 3140a39d0a697ff3603e8c100300fda363658e10b23James Zern 3150a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic void tm_predictor_16x8(uint8_t *dst, const ptrdiff_t stride, int16x8_t l, 3160a39d0a697ff3603e8c100300fda363658e10b23James Zern int16x8_t ah, int16x8_t al, int16x8_t tl) { 3170a39d0a697ff3603e8c100300fda363658e10b23James Zern int16x8_t vh, vl, ls; 3180a39d0a697ff3603e8c100300fda363658e10b23James Zern 3190a39d0a697ff3603e8c100300fda363658e10b23James Zern ls = vec_splat(l, 0); 3200a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, ah), tl); 3210a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, al), tl); 3220a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3230a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3240a39d0a697ff3603e8c100300fda363658e10b23James Zern 3250a39d0a697ff3603e8c100300fda363658e10b23James Zern ls = vec_splat(l, 1); 3260a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, ah), tl); 3270a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, al), tl); 3280a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3290a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3300a39d0a697ff3603e8c100300fda363658e10b23James Zern 3310a39d0a697ff3603e8c100300fda363658e10b23James Zern ls = vec_splat(l, 2); 3320a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, ah), tl); 3330a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, al), tl); 3340a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3350a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3360a39d0a697ff3603e8c100300fda363658e10b23James Zern 3370a39d0a697ff3603e8c100300fda363658e10b23James Zern ls = vec_splat(l, 3); 3380a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, ah), tl); 3390a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, al), tl); 3400a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3410a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3420a39d0a697ff3603e8c100300fda363658e10b23James Zern 3430a39d0a697ff3603e8c100300fda363658e10b23James Zern ls = vec_splat(l, 4); 3440a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, ah), tl); 3450a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, al), tl); 3460a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3470a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3480a39d0a697ff3603e8c100300fda363658e10b23James Zern 3490a39d0a697ff3603e8c100300fda363658e10b23James Zern ls = vec_splat(l, 5); 3500a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, ah), tl); 3510a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, al), tl); 3520a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3530a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3540a39d0a697ff3603e8c100300fda363658e10b23James Zern 3550a39d0a697ff3603e8c100300fda363658e10b23James Zern ls = vec_splat(l, 6); 3560a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, ah), tl); 3570a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, al), tl); 3580a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3590a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 3600a39d0a697ff3603e8c100300fda363658e10b23James Zern 3610a39d0a697ff3603e8c100300fda363658e10b23James Zern ls = vec_splat(l, 7); 3620a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, ah), tl); 3630a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, al), tl); 3640a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3650a39d0a697ff3603e8c100300fda363658e10b23James Zern} 3660a39d0a697ff3603e8c100300fda363658e10b23James Zern 3670a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_tm_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 3680a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 3690a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); 3700a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t l = vec_vsx_ld(0, left); 3710a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t lh = unpack_to_s16_h(l); 3720a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t ll = unpack_to_s16_l(l); 3730a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a = vec_vsx_ld(0, above); 3740a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t ah = unpack_to_s16_h(a); 3750a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t al = unpack_to_s16_l(a); 3760a39d0a697ff3603e8c100300fda363658e10b23James Zern 3770a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_16x8(dst, stride, lh, ah, al, tl); 3780a39d0a697ff3603e8c100300fda363658e10b23James Zern 3790a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride * 8; 3800a39d0a697ff3603e8c100300fda363658e10b23James Zern 3810a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_16x8(dst, stride, ll, ah, al, tl); 3820a39d0a697ff3603e8c100300fda363658e10b23James Zern} 3830a39d0a697ff3603e8c100300fda363658e10b23James Zern 3840a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void tm_predictor_32x1(uint8_t *dst, const int16x8_t ls, 3850a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t a0h, const int16x8_t a0l, 3860a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t a1h, const int16x8_t a1l, 3870a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t tl) { 3880a39d0a697ff3603e8c100300fda363658e10b23James Zern int16x8_t vh, vl; 3890a39d0a697ff3603e8c100300fda363658e10b23James Zern 3900a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, a0h), tl); 3910a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, a0l), tl); 3920a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 0, dst); 3930a39d0a697ff3603e8c100300fda363658e10b23James Zern vh = vec_sub(vec_add(ls, a1h), tl); 3940a39d0a697ff3603e8c100300fda363658e10b23James Zern vl = vec_sub(vec_add(ls, a1l), tl); 3950a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(vec_packsu(vh, vl), 16, dst); 3960a39d0a697ff3603e8c100300fda363658e10b23James Zern} 3970a39d0a697ff3603e8c100300fda363658e10b23James Zern 3980a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic void tm_predictor_32x8(uint8_t *dst, const ptrdiff_t stride, 3990a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t l, const uint8x16_t a0, 4000a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a1, const int16x8_t tl) { 4010a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t a0h = unpack_to_s16_h(a0); 4020a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t a0l = unpack_to_s16_l(a0); 4030a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t a1h = unpack_to_s16_h(a1); 4040a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t a1l = unpack_to_s16_l(a1); 4050a39d0a697ff3603e8c100300fda363658e10b23James Zern 4060a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x1(dst, vec_splat(l, 0), a0h, a0l, a1h, a1l, tl); 4070a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 4080a39d0a697ff3603e8c100300fda363658e10b23James Zern 4090a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x1(dst, vec_splat(l, 1), a0h, a0l, a1h, a1l, tl); 4100a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 4110a39d0a697ff3603e8c100300fda363658e10b23James Zern 4120a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x1(dst, vec_splat(l, 2), a0h, a0l, a1h, a1l, tl); 4130a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 4140a39d0a697ff3603e8c100300fda363658e10b23James Zern 4150a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x1(dst, vec_splat(l, 3), a0h, a0l, a1h, a1l, tl); 4160a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 4170a39d0a697ff3603e8c100300fda363658e10b23James Zern 4180a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x1(dst, vec_splat(l, 4), a0h, a0l, a1h, a1l, tl); 4190a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 4200a39d0a697ff3603e8c100300fda363658e10b23James Zern 4210a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x1(dst, vec_splat(l, 5), a0h, a0l, a1h, a1l, tl); 4220a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 4230a39d0a697ff3603e8c100300fda363658e10b23James Zern 4240a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x1(dst, vec_splat(l, 6), a0h, a0l, a1h, a1l, tl); 4250a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 4260a39d0a697ff3603e8c100300fda363658e10b23James Zern 4270a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x1(dst, vec_splat(l, 7), a0h, a0l, a1h, a1l, tl); 4280a39d0a697ff3603e8c100300fda363658e10b23James Zern} 4290a39d0a697ff3603e8c100300fda363658e10b23James Zern 4300a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_tm_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 4310a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 4320a39d0a697ff3603e8c100300fda363658e10b23James Zern const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); 4330a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t l0 = vec_vsx_ld(0, left); 4340a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t l1 = vec_vsx_ld(16, left); 4350a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a0 = vec_vsx_ld(0, above); 4360a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a1 = vec_vsx_ld(16, above); 4370a39d0a697ff3603e8c100300fda363658e10b23James Zern 4380a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x8(dst, stride, unpack_to_s16_h(l0), a0, a1, tl); 4390a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride * 8; 4400a39d0a697ff3603e8c100300fda363658e10b23James Zern 4410a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x8(dst, stride, unpack_to_s16_l(l0), a0, a1, tl); 4420a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride * 8; 4430a39d0a697ff3603e8c100300fda363658e10b23James Zern 4440a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x8(dst, stride, unpack_to_s16_h(l1), a0, a1, tl); 4450a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride * 8; 4460a39d0a697ff3603e8c100300fda363658e10b23James Zern 4470a39d0a697ff3603e8c100300fda363658e10b23James Zern tm_predictor_32x8(dst, stride, unpack_to_s16_l(l1), a0, a1, tl); 4480a39d0a697ff3603e8c100300fda363658e10b23James Zern} 4490a39d0a697ff3603e8c100300fda363658e10b23James Zern 4500a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void dc_fill_predictor_8x8(uint8_t *dst, const ptrdiff_t stride, 4510a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t val) { 4520a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 4530a39d0a697ff3603e8c100300fda363658e10b23James Zern 4540a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 8; i++, dst += stride) { 4550a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d = vec_vsx_ld(0, dst); 4560a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(val, d, 1), 0, dst); 4570a39d0a697ff3603e8c100300fda363658e10b23James Zern } 4580a39d0a697ff3603e8c100300fda363658e10b23James Zern} 4590a39d0a697ff3603e8c100300fda363658e10b23James Zern 4600a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void dc_fill_predictor_16x16(uint8_t *dst, const ptrdiff_t stride, 4610a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t val) { 4620a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 4630a39d0a697ff3603e8c100300fda363658e10b23James Zern 4640a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 16; i++, dst += stride) { 4650a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(val, 0, dst); 4660a39d0a697ff3603e8c100300fda363658e10b23James Zern } 4670a39d0a697ff3603e8c100300fda363658e10b23James Zern} 4680a39d0a697ff3603e8c100300fda363658e10b23James Zern 4690a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_128_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 4700a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 4710a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v128 = vec_sl(vec_splat_u8(1), vec_splat_u8(7)); 4720a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)above; 4730a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 4740a39d0a697ff3603e8c100300fda363658e10b23James Zern 4750a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_16x16(dst, stride, v128); 4760a39d0a697ff3603e8c100300fda363658e10b23James Zern} 4770a39d0a697ff3603e8c100300fda363658e10b23James Zern 4780a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic INLINE void dc_fill_predictor_32x32(uint8_t *dst, const ptrdiff_t stride, 4790a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t val) { 4800a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 4810a39d0a697ff3603e8c100300fda363658e10b23James Zern 4820a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 32; i++, dst += stride) { 4830a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(val, 0, dst); 4840a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(val, 16, dst); 4850a39d0a697ff3603e8c100300fda363658e10b23James Zern } 4860a39d0a697ff3603e8c100300fda363658e10b23James Zern} 4870a39d0a697ff3603e8c100300fda363658e10b23James Zern 4880a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_128_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 4890a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 4900a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v128 = vec_sl(vec_splat_u8(1), vec_splat_u8(7)); 4910a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)above; 4920a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 4930a39d0a697ff3603e8c100300fda363658e10b23James Zern 4940a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_32x32(dst, stride, v128); 4950a39d0a697ff3603e8c100300fda363658e10b23James Zern} 4960a39d0a697ff3603e8c100300fda363658e10b23James Zern 4970a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t avg16(const uint8_t *values) { 4980a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t sum4s = 4990a39d0a697ff3603e8c100300fda363658e10b23James Zern (int32x4_t)vec_sum4s(vec_vsx_ld(0, values), vec_splat_u32(0)); 5000a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, vec_splat_s32(8)); 5010a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(4)); 5020a39d0a697ff3603e8c100300fda363658e10b23James Zern 5030a39d0a697ff3603e8c100300fda363658e10b23James Zern return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 5040a39d0a697ff3603e8c100300fda363658e10b23James Zern 3); 5050a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5060a39d0a697ff3603e8c100300fda363658e10b23James Zern 5070a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_left_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 5080a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, 5090a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *left) { 5100a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)above; 5110a39d0a697ff3603e8c100300fda363658e10b23James Zern 5120a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_16x16(dst, stride, avg16(left)); 5130a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5140a39d0a697ff3603e8c100300fda363658e10b23James Zern 5150a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_top_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 5160a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 5170a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 5180a39d0a697ff3603e8c100300fda363658e10b23James Zern 5190a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_16x16(dst, stride, avg16(above)); 5200a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5210a39d0a697ff3603e8c100300fda363658e10b23James Zern 5220a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t avg32(const uint8_t *values) { 5230a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v0 = vec_vsx_ld(0, values); 5240a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t v1 = vec_vsx_ld(16, values); 5250a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t v16 = vec_sl(vec_splat_s32(1), vec_splat_u32(4)); 5260a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t sum4s = 5270a39d0a697ff3603e8c100300fda363658e10b23James Zern (int32x4_t)vec_sum4s(v0, vec_sum4s(v1, vec_splat_u32(0))); 5280a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v16); 5290a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(5)); 5300a39d0a697ff3603e8c100300fda363658e10b23James Zern 5310a39d0a697ff3603e8c100300fda363658e10b23James Zern return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 5320a39d0a697ff3603e8c100300fda363658e10b23James Zern 3); 5330a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5340a39d0a697ff3603e8c100300fda363658e10b23James Zern 5350a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_left_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 5360a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, 5370a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *left) { 5380a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)above; 5390a39d0a697ff3603e8c100300fda363658e10b23James Zern 5400a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_32x32(dst, stride, avg32(left)); 5410a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5420a39d0a697ff3603e8c100300fda363658e10b23James Zern 5430a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_top_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 5440a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 5450a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 5460a39d0a697ff3603e8c100300fda363658e10b23James Zern 5470a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_32x32(dst, stride, avg32(above)); 5480a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5490a39d0a697ff3603e8c100300fda363658e10b23James Zern 5500a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t dc_avg8(const uint8_t *above, const uint8_t *left) { 5510a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a0 = vec_vsx_ld(0, above); 5520a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t l0 = vec_vsx_ld(0, left); 5530a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t sum4s = 5540a39d0a697ff3603e8c100300fda363658e10b23James Zern (int32x4_t)vec_sum4s(l0, vec_sum4s(a0, vec_splat_u32(0))); 5550a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t sum4s8 = xxpermdi(sum4s, vec_splat_s32(0), 1); 5560a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s8, vec_splat_s32(8)); 5570a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(4)); 5580a39d0a697ff3603e8c100300fda363658e10b23James Zern 5590a39d0a697ff3603e8c100300fda363658e10b23James Zern return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 5600a39d0a697ff3603e8c100300fda363658e10b23James Zern 3); 5610a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5620a39d0a697ff3603e8c100300fda363658e10b23James Zern 5630a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t dc_avg16(const uint8_t *above, const uint8_t *left) { 5640a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a0 = vec_vsx_ld(0, above); 5650a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t l0 = vec_vsx_ld(0, left); 5660a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t v16 = vec_sl(vec_splat_s32(1), vec_splat_u32(4)); 5670a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t sum4s = 5680a39d0a697ff3603e8c100300fda363658e10b23James Zern (int32x4_t)vec_sum4s(l0, vec_sum4s(a0, vec_splat_u32(0))); 5690a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v16); 5700a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(5)); 5710a39d0a697ff3603e8c100300fda363658e10b23James Zern 5720a39d0a697ff3603e8c100300fda363658e10b23James Zern return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 5730a39d0a697ff3603e8c100300fda363658e10b23James Zern 3); 5740a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5750a39d0a697ff3603e8c100300fda363658e10b23James Zern 5760a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, 5770a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 5780a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_8x8(dst, stride, dc_avg8(above, left)); 5790a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5800a39d0a697ff3603e8c100300fda363658e10b23James Zern 5810a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 5820a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 5830a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_16x16(dst, stride, dc_avg16(above, left)); 5840a39d0a697ff3603e8c100300fda363658e10b23James Zern} 5850a39d0a697ff3603e8c100300fda363658e10b23James Zern 5860a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t dc_avg32(const uint8_t *above, const uint8_t *left) { 5870a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a0 = vec_vsx_ld(0, above); 5880a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a1 = vec_vsx_ld(16, above); 5890a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t l0 = vec_vsx_ld(0, left); 5900a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t l1 = vec_vsx_ld(16, left); 5910a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t v32 = vec_sl(vec_splat_s32(1), vec_splat_u32(5)); 5920a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t a_sum = vec_sum4s(a0, vec_sum4s(a1, vec_splat_u32(0))); 5930a39d0a697ff3603e8c100300fda363658e10b23James Zern const int32x4_t sum4s = (int32x4_t)vec_sum4s(l0, vec_sum4s(l1, a_sum)); 5940a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v32); 5950a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(6)); 5960a39d0a697ff3603e8c100300fda363658e10b23James Zern 5970a39d0a697ff3603e8c100300fda363658e10b23James Zern return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 5980a39d0a697ff3603e8c100300fda363658e10b23James Zern 3); 5990a39d0a697ff3603e8c100300fda363658e10b23James Zern} 6000a39d0a697ff3603e8c100300fda363658e10b23James Zern 6010a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_dc_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 6020a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 6030a39d0a697ff3603e8c100300fda363658e10b23James Zern dc_fill_predictor_32x32(dst, stride, dc_avg32(above, left)); 6040a39d0a697ff3603e8c100300fda363658e10b23James Zern} 6050a39d0a697ff3603e8c100300fda363658e10b23James Zern 6060a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic uint8x16_t avg3(const uint8x16_t a, const uint8x16_t b, 6070a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c) { 6080a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t ac = 6090a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_adds(vec_and(a, c), vec_sr(vec_xor(a, c), vec_splat_u8(1))); 6100a39d0a697ff3603e8c100300fda363658e10b23James Zern 6110a39d0a697ff3603e8c100300fda363658e10b23James Zern return vec_avg(ac, b); 6120a39d0a697ff3603e8c100300fda363658e10b23James Zern} 6130a39d0a697ff3603e8c100300fda363658e10b23James Zern 6140a39d0a697ff3603e8c100300fda363658e10b23James Zern// Workaround vec_sld/vec_xxsldi/vec_lsdoi being missing or broken. 6150a39d0a697ff3603e8c100300fda363658e10b23James Zernstatic const uint8x16_t sl1 = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 6160a39d0a697ff3603e8c100300fda363658e10b23James Zern 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10 }; 6170a39d0a697ff3603e8c100300fda363658e10b23James Zern 6180a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d45_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, 6190a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 6200a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t af = vec_vsx_ld(0, above); 6210a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t above_right = vec_splat(af, 7); 6220a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a = xxpermdi(af, above_right, 1); 6230a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t b = vec_perm(a, above_right, sl1); 6240a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c = vec_perm(b, above_right, sl1); 6250a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row = avg3(a, b, c); 6260a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 6270a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 6280a39d0a697ff3603e8c100300fda363658e10b23James Zern 6290a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 8; i++) { 6300a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d = vec_vsx_ld(0, dst); 6310a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(row, d, 1), 0, dst); 6320a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 6330a39d0a697ff3603e8c100300fda363658e10b23James Zern row = vec_perm(row, above_right, sl1); 6340a39d0a697ff3603e8c100300fda363658e10b23James Zern } 6350a39d0a697ff3603e8c100300fda363658e10b23James Zern} 6360a39d0a697ff3603e8c100300fda363658e10b23James Zern 6370a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d45_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 6380a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 6390a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a = vec_vsx_ld(0, above); 6400a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t above_right = vec_splat(a, 15); 6410a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t b = vec_perm(a, above_right, sl1); 6420a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c = vec_perm(b, above_right, sl1); 6430a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row = avg3(a, b, c); 6440a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 6450a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 6460a39d0a697ff3603e8c100300fda363658e10b23James Zern 6470a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 16; i++) { 6480a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row, 0, dst); 6490a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 6500a39d0a697ff3603e8c100300fda363658e10b23James Zern row = vec_perm(row, above_right, sl1); 6510a39d0a697ff3603e8c100300fda363658e10b23James Zern } 6520a39d0a697ff3603e8c100300fda363658e10b23James Zern} 6530a39d0a697ff3603e8c100300fda363658e10b23James Zern 6540a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d45_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 6550a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 6560a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a0 = vec_vsx_ld(0, above); 6570a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a1 = vec_vsx_ld(16, above); 6580a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t above_right = vec_splat(a1, 15); 6590a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t b0 = vec_perm(a0, a1, sl1); 6600a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t b1 = vec_perm(a1, above_right, sl1); 6610a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c0 = vec_perm(b0, b1, sl1); 6620a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c1 = vec_perm(b1, above_right, sl1); 6630a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row0 = avg3(a0, b0, c0); 6640a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row1 = avg3(a1, b1, c1); 6650a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 6660a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 6670a39d0a697ff3603e8c100300fda363658e10b23James Zern 6680a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 32; i++) { 6690a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row0, 0, dst); 6700a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row1, 16, dst); 6710a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride; 6720a39d0a697ff3603e8c100300fda363658e10b23James Zern row0 = vec_perm(row0, row1, sl1); 6730a39d0a697ff3603e8c100300fda363658e10b23James Zern row1 = vec_perm(row1, above_right, sl1); 6740a39d0a697ff3603e8c100300fda363658e10b23James Zern } 6750a39d0a697ff3603e8c100300fda363658e10b23James Zern} 6760a39d0a697ff3603e8c100300fda363658e10b23James Zern 6770a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d63_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, 6780a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 6790a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t af = vec_vsx_ld(0, above); 6800a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t above_right = vec_splat(af, 9); 6810a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a = xxpermdi(af, above_right, 1); 6820a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t b = vec_perm(a, above_right, sl1); 6830a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c = vec_perm(b, above_right, sl1); 6840a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row0 = vec_avg(a, b); 6850a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row1 = avg3(a, b, c); 6860a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 6870a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 6880a39d0a697ff3603e8c100300fda363658e10b23James Zern 6890a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 4; i++) { 6900a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d0 = vec_vsx_ld(0, dst); 6910a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t d1 = vec_vsx_ld(0, dst + stride); 6920a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(row0, d0, 1), 0, dst); 6930a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(xxpermdi(row1, d1, 1), 0, dst + stride); 6940a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride * 2; 6950a39d0a697ff3603e8c100300fda363658e10b23James Zern row0 = vec_perm(row0, above_right, sl1); 6960a39d0a697ff3603e8c100300fda363658e10b23James Zern row1 = vec_perm(row1, above_right, sl1); 6970a39d0a697ff3603e8c100300fda363658e10b23James Zern } 6980a39d0a697ff3603e8c100300fda363658e10b23James Zern} 6990a39d0a697ff3603e8c100300fda363658e10b23James Zern 7000a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d63_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, 7010a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 7020a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a0 = vec_vsx_ld(0, above); 7030a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a1 = vec_vsx_ld(16, above); 7040a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t above_right = vec_splat(a1, 0); 7050a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t b = vec_perm(a0, above_right, sl1); 7060a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c = vec_perm(b, above_right, sl1); 7070a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row0 = vec_avg(a0, b); 7080a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row1 = avg3(a0, b, c); 7090a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 7100a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 7110a39d0a697ff3603e8c100300fda363658e10b23James Zern 7120a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 8; i++) { 7130a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row0, 0, dst); 7140a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row1, 0, dst + stride); 7150a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride * 2; 7160a39d0a697ff3603e8c100300fda363658e10b23James Zern row0 = vec_perm(row0, above_right, sl1); 7170a39d0a697ff3603e8c100300fda363658e10b23James Zern row1 = vec_perm(row1, above_right, sl1); 7180a39d0a697ff3603e8c100300fda363658e10b23James Zern } 7190a39d0a697ff3603e8c100300fda363658e10b23James Zern} 7200a39d0a697ff3603e8c100300fda363658e10b23James Zern 7210a39d0a697ff3603e8c100300fda363658e10b23James Zernvoid vpx_d63_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, 7220a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8_t *above, const uint8_t *left) { 7230a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a0 = vec_vsx_ld(0, above); 7240a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a1 = vec_vsx_ld(16, above); 7250a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t a2 = vec_vsx_ld(32, above); 7260a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t above_right = vec_splat(a2, 0); 7270a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t b0 = vec_perm(a0, a1, sl1); 7280a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t b1 = vec_perm(a1, above_right, sl1); 7290a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c0 = vec_perm(b0, b1, sl1); 7300a39d0a697ff3603e8c100300fda363658e10b23James Zern const uint8x16_t c1 = vec_perm(b1, above_right, sl1); 7310a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row0_0 = vec_avg(a0, b0); 7320a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row0_1 = vec_avg(a1, b1); 7330a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row1_0 = avg3(a0, b0, c0); 7340a39d0a697ff3603e8c100300fda363658e10b23James Zern uint8x16_t row1_1 = avg3(a1, b1, c1); 7350a39d0a697ff3603e8c100300fda363658e10b23James Zern int i; 7360a39d0a697ff3603e8c100300fda363658e10b23James Zern (void)left; 7370a39d0a697ff3603e8c100300fda363658e10b23James Zern 7380a39d0a697ff3603e8c100300fda363658e10b23James Zern for (i = 0; i < 16; i++) { 7390a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row0_0, 0, dst); 7400a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row0_1, 16, dst); 7410a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row1_0, 0, dst + stride); 7420a39d0a697ff3603e8c100300fda363658e10b23James Zern vec_vsx_st(row1_1, 16, dst + stride); 7430a39d0a697ff3603e8c100300fda363658e10b23James Zern dst += stride * 2; 7440a39d0a697ff3603e8c100300fda363658e10b23James Zern row0_0 = vec_perm(row0_0, row0_1, sl1); 7450a39d0a697ff3603e8c100300fda363658e10b23James Zern row0_1 = vec_perm(row0_1, above_right, sl1); 7460a39d0a697ff3603e8c100300fda363658e10b23James Zern row1_0 = vec_perm(row1_0, row1_1, sl1); 7470a39d0a697ff3603e8c100300fda363658e10b23James Zern row1_1 = vec_perm(row1_1, above_right, sl1); 7480a39d0a697ff3603e8c100300fda363658e10b23James Zern } 7490a39d0a697ff3603e8c100300fda363658e10b23James Zern} 750