1/* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <assert.h> 12#include <stdio.h> 13 14#include "./vpx_config.h" 15#include "./vp9_rtcd.h" 16#include "vp9/common/vp9_common.h" 17#include "vp9/common/vp9_blockd.h" 18#include "vp9/common/vp9_idct.h" 19#include "vpx_dsp/mips/inv_txfm_dspr2.h" 20#include "vpx_dsp/txfm_common.h" 21#include "vpx_ports/mem.h" 22 23#if HAVE_DSPR2 24void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int pitch, 25 int tx_type) { 26 int i, j; 27 DECLARE_ALIGNED(32, int16_t, out[16 * 16]); 28 int16_t *outptr = out; 29 int16_t temp_out[16]; 30 uint32_t pos = 45; 31 32 /* bit positon for extract from acc */ 33 __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); 34 35 switch (tx_type) { 36 case DCT_DCT: // DCT in both horizontal and vertical 37 idct16_rows_dspr2(input, outptr, 16); 38 idct16_cols_add_blk_dspr2(out, dest, pitch); 39 break; 40 case ADST_DCT: // ADST in vertical, DCT in horizontal 41 idct16_rows_dspr2(input, outptr, 16); 42 43 outptr = out; 44 45 for (i = 0; i < 16; ++i) { 46 iadst16_dspr2(outptr, temp_out); 47 48 for (j = 0; j < 16; ++j) 49 dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + 50 dest[j * pitch + i]); 51 outptr += 16; 52 } 53 break; 54 case DCT_ADST: // DCT in vertical, ADST in horizontal 55 { 56 int16_t temp_in[16 * 16]; 57 58 for (i = 0; i < 16; ++i) { 59 /* prefetch row */ 60 prefetch_load((const uint8_t *)(input + 16)); 61 62 iadst16_dspr2(input, outptr); 63 input += 16; 64 outptr += 16; 65 } 66 67 for (i = 0; i < 16; ++i) 68 for (j = 0; j < 16; ++j) temp_in[j * 16 + i] = out[i * 16 + j]; 69 70 idct16_cols_add_blk_dspr2(temp_in, dest, pitch); 71 break; 72 } 73 case ADST_ADST: // ADST in both directions 74 { 75 int16_t temp_in[16]; 76 77 for (i = 0; i < 16; ++i) { 78 /* prefetch row */ 79 prefetch_load((const uint8_t *)(input + 16)); 80 81 iadst16_dspr2(input, outptr); 82 input += 16; 83 outptr += 16; 84 } 85 86 for (i = 0; i < 16; ++i) { 87 for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; 88 iadst16_dspr2(temp_in, temp_out); 89 for (j = 0; j < 16; ++j) 90 dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + 91 dest[j * pitch + i]); 92 } 93 break; 94 } 95 default: printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n"); break; 96 } 97} 98#endif // #if HAVE_DSPR2 99