1df37111358d02836cb29bbcb9c6e4c95dff90a16Johann/* 2df37111358d02836cb29bbcb9c6e4c95dff90a16Johann * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 3df37111358d02836cb29bbcb9c6e4c95dff90a16Johann * 4df37111358d02836cb29bbcb9c6e4c95dff90a16Johann * Use of this source code is governed by a BSD-style license 5df37111358d02836cb29bbcb9c6e4c95dff90a16Johann * that can be found in the LICENSE file in the root of the source 6df37111358d02836cb29bbcb9c6e4c95dff90a16Johann * tree. An additional intellectual property rights grant can be found 7df37111358d02836cb29bbcb9c6e4c95dff90a16Johann * in the file PATENTS. All contributing project authors may 8df37111358d02836cb29bbcb9c6e4c95dff90a16Johann * be found in the AUTHORS file in the root of the source tree. 9df37111358d02836cb29bbcb9c6e4c95dff90a16Johann */ 10df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 11df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#include "./vp8_rtcd.h" 12df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#include "vpx_ports/mem.h" 13df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#include "vpx_ports/asmdefs_mmi.h" 14df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 15df37111358d02836cb29bbcb9c6e4c95dff90a16Johann/* clang-format off */ 16df37111358d02836cb29bbcb9c6e4c95dff90a16Johann/* TRANSPOSE_4H: transpose 4x4 matrix. 17df37111358d02836cb29bbcb9c6e4c95dff90a16Johann Input: ftmp1,ftmp2,ftmp3,ftmp4 18df37111358d02836cb29bbcb9c6e4c95dff90a16Johann Output: ftmp1,ftmp2,ftmp3,ftmp4 19df37111358d02836cb29bbcb9c6e4c95dff90a16Johann Note: ftmp0 always be 0, ftmp5~9 used for temporary value. 20df37111358d02836cb29bbcb9c6e4c95dff90a16Johann */ 21df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#define TRANSPOSE_4H \ 22df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_LI(%[tmp0], 0x93) \ 23df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "mtc1 %[tmp0], %[ftmp10] \n\t" \ 24df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ 25df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ 26df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ 27df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "or %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ 28df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \ 29df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ 30df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ 31df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "or %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \ 32df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ 33df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ 34df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ 35df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "or %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \ 36df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ 37df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ 38df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ 39df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "or %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ 40df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \ 41df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \ 42df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \ 43df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhwd %[ftmp4], %[ftmp6], %[ftmp8] \n\t" 44df37111358d02836cb29bbcb9c6e4c95dff90a16Johann/* clang-format on */ 45df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 46df37111358d02836cb29bbcb9c6e4c95dff90a16Johannvoid vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) { 47df37111358d02836cb29bbcb9c6e4c95dff90a16Johann uint64_t tmp[1]; 48df37111358d02836cb29bbcb9c6e4c95dff90a16Johann int16_t *ip = input; 49df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 50df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#if _MIPS_SIM == _ABIO32 51df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp0 asm("$f0"); 52df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp1 asm("$f2"); 53df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp2 asm("$f4"); 54df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp3 asm("$f6"); 55df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp4 asm("$f8"); 56df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp5 asm("$f10"); 57df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp6 asm("$f12"); 58df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp7 asm("$f14"); 59df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp8 asm("$f16"); 60df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp9 asm("$f18"); 61df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp10 asm("$f20"); 62df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp11 asm("$f22"); 63df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp12 asm("$f24"); 64df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#else 65df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp0 asm("$f0"); 66df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp1 asm("$f1"); 67df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp2 asm("$f2"); 68df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp3 asm("$f3"); 69df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp4 asm("$f4"); 70df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp5 asm("$f5"); 71df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp6 asm("$f6"); 72df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp7 asm("$f7"); 73df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp8 asm("$f8"); 74df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp9 asm("$f9"); 75df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp10 asm("$f10"); 76df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp11 asm("$f11"); 77df37111358d02836cb29bbcb9c6e4c95dff90a16Johann register double ftmp12 asm("$f12"); 78df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#endif // _MIPS_SIM == _ABIO32 79df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 80df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_ph_01) = { 0x0001000100010001ULL }; 81df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_ph_07) = { 0x0007000700070007ULL }; 82df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_12000) = { 0x00002ee000002ee0ULL }; 83df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_51000) = { 0x0000c7380000c738ULL }; 84df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_14500) = { 0x000038a4000038a4ULL }; 85df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_7500) = { 0x00001d4c00001d4cULL }; 86df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_ph_op1) = { 0x14e808a914e808a9ULL }; 87df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_ph_op3) = { 0xeb1808a9eb1808a9ULL }; 88df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_5352) = { 0x000014e8000014e8ULL }; 89df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_2217) = { 0x000008a9000008a9ULL }; 90df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_ph_8) = { 0x0008000800080008ULL }; 91df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 92df37111358d02836cb29bbcb9c6e4c95dff90a16Johann __asm__ volatile ( 93df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 94df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" 95df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" 96df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_ADDU(%[ip], %[ip], %[pitch]) 97df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldlc1 %[ftmp2], 0x07(%[ip]) \n\t" 98df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldrc1 %[ftmp2], 0x00(%[ip]) \n\t" 99df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_ADDU(%[ip], %[ip], %[pitch]) 100df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldlc1 %[ftmp3], 0x07(%[ip]) \n\t" 101df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldrc1 %[ftmp3], 0x00(%[ip]) \n\t" 102df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_ADDU(%[ip], %[ip], %[pitch]) 103df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldlc1 %[ftmp4], 0x07(%[ip]) \n\t" 104df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldrc1 %[ftmp4], 0x00(%[ip]) \n\t" 105df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_ADDU(%[ip], %[ip], %[pitch]) 106df37111358d02836cb29bbcb9c6e4c95dff90a16Johann TRANSPOSE_4H 107df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 108df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "ldc1 %[ftmp11], %[ff_ph_8] \n\t" 109df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // f1 + f4 110df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp5], %[ftmp1], %[ftmp4] \n\t" 111df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a1 112df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmullh %[ftmp5], %[ftmp5], %[ftmp11] \n\t" 113df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // f2 + f3 114df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 115df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b1 116df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmullh %[ftmp6], %[ftmp6], %[ftmp11] \n\t" 117df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // f2 - f3 118df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp7], %[ftmp2], %[ftmp3] \n\t" 119df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // c1 120df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmullh %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 121df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // f1 - f4 122df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp8], %[ftmp1], %[ftmp4] \n\t" 123df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // d1 124df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmullh %[ftmp8], %[ftmp8], %[ftmp11] \n\t" 125df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[0] = a1 + b1 126df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" 127df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[2] = a1 - b1 128df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp3], %[ftmp5], %[ftmp6] \n\t" 129df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 130df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12 131df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_LI(%[tmp0], 0x0c) 132df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "mtc1 %[tmp0], %[ftmp11] \n\t" 133df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "ldc1 %[ftmp12], %[ff_pw_14500] \n\t" 134df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 135df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp5], %[ftmp9], %[ff_ph_op1] \n\t" 136df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" 137df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp6], %[ftmp9], %[ff_ph_op1] \n\t" 138df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp5], %[ftmp5], %[ftmp12] \n\t" 139df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp6], %[ftmp6], %[ftmp12] \n\t" 140df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp5], %[ftmp5], %[ftmp11] \n\t" 141df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp6], %[ftmp6], %[ftmp11] \n\t" 142df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "packsswh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" 143df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 144df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12 145df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "ldc1 %[ftmp12], %[ff_pw_7500] \n\t" 146df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklhw %[ftmp9], %[ftmp8], %[ftmp7] \n\t" 147df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp5], %[ftmp9], %[ff_ph_op3] \n\t" 148df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhhw %[ftmp9], %[ftmp8], %[ftmp7] \n\t" 149df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp6], %[ftmp9], %[ff_ph_op3] \n\t" 150df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp5], %[ftmp5], %[ftmp12] \n\t" 151df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp6], %[ftmp6], %[ftmp12] \n\t" 152df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp5], %[ftmp5], %[ftmp11] \n\t" 153df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp6], %[ftmp6], %[ftmp11] \n\t" 154df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "packsswh %[ftmp4], %[ftmp5], %[ftmp6] \n\t" 155df37111358d02836cb29bbcb9c6e4c95dff90a16Johann TRANSPOSE_4H 156df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 157df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp5], %[ftmp1], %[ftmp4] \n\t" 158df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" 159df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp7], %[ftmp2], %[ftmp3] \n\t" 160df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp8], %[ftmp1], %[ftmp4] \n\t" 161df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 162df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpeqh %[ftmp0], %[ftmp8], %[ftmp0] \n\t" 163df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "ldc1 %[ftmp9], %[ff_ph_01] \n\t" 164df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp0], %[ftmp0], %[ftmp9] \n\t" 165df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 166df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" 167df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" 168df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "ldc1 %[ftmp9], %[ff_ph_07] \n\t" 169df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 170df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 171df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_LI(%[tmp0], 0x04) 172df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "mtc1 %[tmp0], %[ftmp9] \n\t" 173df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 174df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 175df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 176df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_LI(%[tmp0], 0x10) 177df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "mtc1 %[tmp0], %[ftmp9] \n\t" 178df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "ldc1 %[ftmp12], %[ff_pw_12000] \n\t" 179df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklhw %[ftmp5], %[ftmp7], %[ftmp8] \n\t" 180df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp10], %[ftmp5], %[ff_ph_op1] \n\t" 181df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhhw %[ftmp5], %[ftmp7], %[ftmp8] \n\t" 182df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp11], %[ftmp5], %[ff_ph_op1] \n\t" 183df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 184df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" 185df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp10], %[ftmp10], %[ftmp9] \n\t" 186df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp11], %[ftmp11], %[ftmp9] \n\t" 187df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "packsswh %[ftmp3], %[ftmp10], %[ftmp11] \n\t" 188df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" 189df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 190df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "ldc1 %[ftmp12], %[ff_pw_51000] \n\t" 191df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpcklhw %[ftmp5], %[ftmp8], %[ftmp7] \n\t" 192df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp10], %[ftmp5], %[ff_ph_op3] \n\t" 193df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "punpckhhw %[ftmp5], %[ftmp8], %[ftmp7] \n\t" 194df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp11], %[ftmp5], %[ff_ph_op3] \n\t" 195df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp10], %[ftmp10], %[ftmp12] \n\t" 196df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" 197df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp10], %[ftmp10], %[ftmp9] \n\t" 198df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp11], %[ftmp11], %[ftmp9] \n\t" 199df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "packsswh %[ftmp4], %[ftmp10], %[ftmp11] \n\t" 200df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 201df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdlc1 %[ftmp1], 0x07(%[output]) \n\t" 202df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdrc1 %[ftmp1], 0x00(%[output]) \n\t" 203df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdlc1 %[ftmp3], 0x0f(%[output]) \n\t" 204df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdrc1 %[ftmp3], 0x08(%[output]) \n\t" 205df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdlc1 %[ftmp2], 0x17(%[output]) \n\t" 206df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdrc1 %[ftmp2], 0x10(%[output]) \n\t" 207df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdlc1 %[ftmp4], 0x1f(%[output]) \n\t" 208df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdrc1 %[ftmp4], 0x18(%[output]) \n\t" 209df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 210df37111358d02836cb29bbcb9c6e4c95dff90a16Johann : [ftmp0] "=&f"(ftmp0), [ftmp1] "=&f"(ftmp1), [ftmp2] "=&f"(ftmp2), 211df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp3] "=&f"(ftmp3), [ftmp4] "=&f"(ftmp4), [ftmp5] "=&f"(ftmp5), 212df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp6] "=&f"(ftmp6), [ftmp7] "=&f"(ftmp7), [ftmp8] "=&f"(ftmp8), 213df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp9] "=&f"(ftmp9), [ftmp10] "=&f"(ftmp10), [ftmp11] "=&f"(ftmp11), 214df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp12] "=&f"(ftmp12), [tmp0] "=&r"(tmp[0]), [ip]"+&r"(ip) 215df37111358d02836cb29bbcb9c6e4c95dff90a16Johann : [ff_ph_01] "m"(ff_ph_01), [ff_ph_07] "m"(ff_ph_07), 216df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ff_ph_op1] "f"(ff_ph_op1), [ff_ph_op3] "f"(ff_ph_op3), 217df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ff_pw_14500] "m"(ff_pw_14500), [ff_pw_7500] "m"(ff_pw_7500), 218df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ff_pw_12000] "m"(ff_pw_12000), [ff_pw_51000] "m"(ff_pw_51000), 219df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ff_pw_5352]"m"(ff_pw_5352), [ff_pw_2217]"m"(ff_pw_2217), 220df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ff_ph_8]"m"(ff_ph_8), [pitch]"r"(pitch), [output] "r"(output) 221df37111358d02836cb29bbcb9c6e4c95dff90a16Johann : "memory" 222df37111358d02836cb29bbcb9c6e4c95dff90a16Johann ); 223df37111358d02836cb29bbcb9c6e4c95dff90a16Johann} 224df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 225df37111358d02836cb29bbcb9c6e4c95dff90a16Johannvoid vp8_short_fdct8x4_mmi(int16_t *input, int16_t *output, int pitch) { 226df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vp8_short_fdct4x4_mmi(input, output, pitch); 227df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vp8_short_fdct4x4_mmi(input + 4, output + 16, pitch); 228df37111358d02836cb29bbcb9c6e4c95dff90a16Johann} 229df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 230df37111358d02836cb29bbcb9c6e4c95dff90a16Johannvoid vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) { 231df37111358d02836cb29bbcb9c6e4c95dff90a16Johann double ftmp[13]; 232df37111358d02836cb29bbcb9c6e4c95dff90a16Johann uint32_t tmp[1]; 233df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_ph_01) = { 0x0001000100010001ULL }; 234df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_01) = { 0x0000000100000001ULL }; 235df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_03) = { 0x0000000300000003ULL }; 236df37111358d02836cb29bbcb9c6e4c95dff90a16Johann DECLARE_ALIGNED(8, const uint64_t, ff_pw_mask) = { 0x0001000000010000ULL }; 237df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 238df37111358d02836cb29bbcb9c6e4c95dff90a16Johann __asm__ volatile ( 239df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_LI(%[tmp0], 0x02) 240df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" 241df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "mtc1 %[tmp0], %[ftmp11] \n\t" 242df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 243df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" 244df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" 245df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_ADDU(%[ip], %[ip], %[pitch]) 246df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldlc1 %[ftmp2], 0x07(%[ip]) \n\t" 247df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldrc1 %[ftmp2], 0x00(%[ip]) \n\t" 248df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_ADDU(%[ip], %[ip], %[pitch]) 249df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldlc1 %[ftmp3], 0x07(%[ip]) \n\t" 250df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldrc1 %[ftmp3], 0x00(%[ip]) \n\t" 251df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_ADDU(%[ip], %[ip], %[pitch]) 252df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldlc1 %[ftmp4], 0x07(%[ip]) \n\t" 253df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gsldrc1 %[ftmp4], 0x00(%[ip]) \n\t" 254df37111358d02836cb29bbcb9c6e4c95dff90a16Johann TRANSPOSE_4H 255df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 256df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psllh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 257df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psllh %[ftmp2], %[ftmp2], %[ftmp11] \n\t" 258df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psllh %[ftmp3], %[ftmp3], %[ftmp11] \n\t" 259df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psllh %[ftmp4], %[ftmp4], %[ftmp11] \n\t" 260df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a 261df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" 262df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // d 263df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp6], %[ftmp2], %[ftmp4] \n\t" 264df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // c 265df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp7], %[ftmp2], %[ftmp4] \n\t" 266df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b 267df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp8], %[ftmp1], %[ftmp3] \n\t" 268df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 269df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a + d 270df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" 271df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b + c 272df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp2], %[ftmp8], %[ftmp7] \n\t" 273df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b - c 274df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp3], %[ftmp8], %[ftmp7] \n\t" 275df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a - d 276df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubh %[ftmp4], %[ftmp5], %[ftmp6] \n\t" 277df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 278df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpeqh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" 279df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp6], %[ftmp6], %[ff_ph_01] \n\t" 280df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t" 281df37111358d02836cb29bbcb9c6e4c95dff90a16Johann TRANSPOSE_4H 282df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 283df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[2], op[0] 284df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp5], %[ftmp1], %[ff_pw_01] \n\t" 285df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[3], op[1] 286df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp1], %[ftmp1], %[ff_pw_mask] \n\t" 287df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 288df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[6], op[4] 289df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp6], %[ftmp2], %[ff_pw_01] \n\t" 290df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[7], op[5] 291df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp2], %[ftmp2], %[ff_pw_mask] \n\t" 292df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 293df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[10], op[8] 294df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp7], %[ftmp3], %[ff_pw_01] \n\t" 295df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[11], op[9] 296df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp3], %[ftmp3], %[ff_pw_mask] \n\t" 297df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 298df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[14], op[12] 299df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp8], %[ftmp4], %[ff_pw_01] \n\t" 300df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // op[15], op[13] 301df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pmaddhw %[ftmp4], %[ftmp4], %[ff_pw_mask] \n\t" 302df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 303df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a1, a3 304df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp9], %[ftmp5], %[ftmp7] \n\t" 305df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // d1, d3 306df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp10], %[ftmp6], %[ftmp8] \n\t" 307df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // c1, c3 308df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubw %[ftmp11], %[ftmp6], %[ftmp8] \n\t" 309df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b1, b3 310df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubw %[ftmp12], %[ftmp5], %[ftmp7] \n\t" 311df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 312df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a1 + d1, a3 + d3 313df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp5], %[ftmp9], %[ftmp10] \n\t" 314df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b1 + c1, b3 + c3 315df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp6], %[ftmp12], %[ftmp11] \n\t" 316df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b1 - c1, b3 - c3 317df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubw %[ftmp7], %[ftmp12], %[ftmp11] \n\t" 318df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a1 - d1, a3 - d3 319df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubw %[ftmp8], %[ftmp9], %[ftmp10] \n\t" 320df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 321df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a2, a4 322df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp9], %[ftmp1], %[ftmp3] \n\t" 323df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // d2, d4 324df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp10], %[ftmp2], %[ftmp4] \n\t" 325df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // c2, c4 326df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubw %[ftmp11], %[ftmp2], %[ftmp4] \n\t" 327df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b2, b4 328df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubw %[ftmp12], %[ftmp1], %[ftmp3] \n\t" 329df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 330df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a2 + d2, a4 + d4 331df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" 332df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b2 + c2, b4 + c4 333df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp2], %[ftmp12], %[ftmp11] \n\t" 334df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // b2 - c2, b4 - c4 335df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubw %[ftmp3], %[ftmp12], %[ftmp11] \n\t" 336df37111358d02836cb29bbcb9c6e4c95dff90a16Johann // a2 - d2, a4 - d4 337df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psubw %[ftmp4], %[ftmp9], %[ftmp10] \n\t" 338df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 339df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_LI(%[tmp0], 0x03) 340df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "mtc1 %[tmp0], %[ftmp11] \n\t" 341df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 342df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp1] \n\t" 343df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" 344df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp1], %[ftmp1], %[ftmp9] \n\t" 345df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp1], %[ftmp1], %[ff_pw_03] \n\t" 346df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 347df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 348df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp2] \n\t" 349df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" 350df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp2], %[ftmp2], %[ftmp9] \n\t" 351df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp2], %[ftmp2], %[ff_pw_03] \n\t" 352df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp2], %[ftmp2], %[ftmp11] \n\t" 353df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 354df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp3] \n\t" 355df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" 356df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp3], %[ftmp3], %[ftmp9] \n\t" 357df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp3], %[ftmp3], %[ff_pw_03] \n\t" 358df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp3], %[ftmp3], %[ftmp11] \n\t" 359df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 360df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp4] \n\t" 361df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" 362df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp4], %[ftmp4], %[ftmp9] \n\t" 363df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp4], %[ftmp4], %[ff_pw_03] \n\t" 364df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp4], %[ftmp4], %[ftmp11] \n\t" 365df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 366df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp5] \n\t" 367df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" 368df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp5], %[ftmp5], %[ftmp9] \n\t" 369df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp5], %[ftmp5], %[ff_pw_03] \n\t" 370df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp5], %[ftmp5], %[ftmp11] \n\t" 371df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 372df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp6] \n\t" 373df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" 374df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp6], %[ftmp6], %[ftmp9] \n\t" 375df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp6], %[ftmp6], %[ff_pw_03] \n\t" 376df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp6], %[ftmp6], %[ftmp11] \n\t" 377df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 378df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp7] \n\t" 379df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" 380df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp7], %[ftmp7], %[ftmp9] \n\t" 381df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp7], %[ftmp7], %[ff_pw_03] \n\t" 382df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp7], %[ftmp7], %[ftmp11] \n\t" 383df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 384df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp8] \n\t" 385df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" 386df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" 387df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "paddw %[ftmp8], %[ftmp8], %[ff_pw_03] \n\t" 388df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "psraw %[ftmp8], %[ftmp8], %[ftmp11] \n\t" 389df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 390df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "packsswh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" 391df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "packsswh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" 392df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "packsswh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" 393df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "packsswh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" 394df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 395df37111358d02836cb29bbcb9c6e4c95dff90a16Johann MMI_LI(%[tmp0], 0x72) 396df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "mtc1 %[tmp0], %[ftmp11] \n\t" 397df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pshufh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" 398df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pshufh %[ftmp2], %[ftmp2], %[ftmp11] \n\t" 399df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pshufh %[ftmp3], %[ftmp3], %[ftmp11] \n\t" 400df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "pshufh %[ftmp4], %[ftmp4], %[ftmp11] \n\t" 401df37111358d02836cb29bbcb9c6e4c95dff90a16Johann 402df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdlc1 %[ftmp1], 0x07(%[op]) \n\t" 403df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdrc1 %[ftmp1], 0x00(%[op]) \n\t" 404df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdlc1 %[ftmp2], 0x0f(%[op]) \n\t" 405df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdrc1 %[ftmp2], 0x08(%[op]) \n\t" 406df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdlc1 %[ftmp3], 0x17(%[op]) \n\t" 407df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdrc1 %[ftmp3], 0x10(%[op]) \n\t" 408df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdlc1 %[ftmp4], 0x1f(%[op]) \n\t" 409df37111358d02836cb29bbcb9c6e4c95dff90a16Johann "gssdrc1 %[ftmp4], 0x18(%[op]) \n\t" 410df37111358d02836cb29bbcb9c6e4c95dff90a16Johann : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), 411df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), 412df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), 413df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), 414df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), 415df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), 416df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ftmp12]"=&f"(ftmp[12]), 417df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [tmp0]"=&r"(tmp[0]), 418df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ip]"+&r"(input) 419df37111358d02836cb29bbcb9c6e4c95dff90a16Johann : [op]"r"(output), 420df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ff_pw_01]"f"(ff_pw_01), [pitch]"r"((mips_reg)pitch), 421df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ff_pw_03]"f"(ff_pw_03), [ff_pw_mask]"f"(ff_pw_mask), 422df37111358d02836cb29bbcb9c6e4c95dff90a16Johann [ff_ph_01]"f"(ff_ph_01) 423df37111358d02836cb29bbcb9c6e4c95dff90a16Johann : "memory" 424df37111358d02836cb29bbcb9c6e4c95dff90a16Johann ); 425df37111358d02836cb29bbcb9c6e4c95dff90a16Johann} 426