190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/* 2f71323e297a928af368937089d3ed71239786f86Andreas Huber * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 4f71323e297a928af368937089d3ed71239786f86Andreas Huber * Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber * that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber * tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber * in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber * be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */ 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vp8_rtcd.h" 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/**************************************************************************** 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * Notes: 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * This implementation makes use of 16 bit fixed point verio of two multiply 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * constants: 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 1. sqrt(2) * cos (pi/8) 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * 2. sqrt(2) * sin (pi/8) 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * Becuase the first constant is bigger than 1, to maintain the same 16 bit 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * fixed point precision as the second one, we use a trick of 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * x * a = x + x*(a-1) 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * so 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber **************************************************************************/ 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic const int cospi8sqrt2minus1 = 20091; 277bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic const int sinpi8sqrt2 = 35468; 281b362b15af34006e6a11974088a46d42b903418eJohann 291b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr, 301b362b15af34006e6a11974088a46d42b903418eJohann int pred_stride, unsigned char *dst_ptr, 317bc9febe8749e98a3812a0dc4380ceae75c29450Johann int dst_stride) { 327bc9febe8749e98a3812a0dc4380ceae75c29450Johann int i; 337bc9febe8749e98a3812a0dc4380ceae75c29450Johann int r, c; 347bc9febe8749e98a3812a0dc4380ceae75c29450Johann int a1, b1, c1, d1; 357bc9febe8749e98a3812a0dc4380ceae75c29450Johann short output[16]; 367bc9febe8749e98a3812a0dc4380ceae75c29450Johann short *ip = input; 377bc9febe8749e98a3812a0dc4380ceae75c29450Johann short *op = output; 387bc9febe8749e98a3812a0dc4380ceae75c29450Johann int temp1, temp2; 397bc9febe8749e98a3812a0dc4380ceae75c29450Johann int shortpitch = 4; 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 417bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 4; ++i) { 427bc9febe8749e98a3812a0dc4380ceae75c29450Johann a1 = ip[0] + ip[8]; 437bc9febe8749e98a3812a0dc4380ceae75c29450Johann b1 = ip[0] - ip[8]; 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 457bc9febe8749e98a3812a0dc4380ceae75c29450Johann temp1 = (ip[4] * sinpi8sqrt2) >> 16; 467bc9febe8749e98a3812a0dc4380ceae75c29450Johann temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); 477bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = temp1 - temp2; 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 497bc9febe8749e98a3812a0dc4380ceae75c29450Johann temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); 507bc9febe8749e98a3812a0dc4380ceae75c29450Johann temp2 = (ip[12] * sinpi8sqrt2) >> 16; 517bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1 = temp1 + temp2; 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 537bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[shortpitch * 0] = a1 + d1; 547bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[shortpitch * 3] = a1 - d1; 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 567bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[shortpitch * 1] = b1 + c1; 577bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[shortpitch * 2] = b1 - c1; 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 597bc9febe8749e98a3812a0dc4380ceae75c29450Johann ip++; 607bc9febe8749e98a3812a0dc4380ceae75c29450Johann op++; 617bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 637bc9febe8749e98a3812a0dc4380ceae75c29450Johann ip = output; 647bc9febe8749e98a3812a0dc4380ceae75c29450Johann op = output; 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 667bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 4; ++i) { 677bc9febe8749e98a3812a0dc4380ceae75c29450Johann a1 = ip[0] + ip[2]; 687bc9febe8749e98a3812a0dc4380ceae75c29450Johann b1 = ip[0] - ip[2]; 691b362b15af34006e6a11974088a46d42b903418eJohann 707bc9febe8749e98a3812a0dc4380ceae75c29450Johann temp1 = (ip[1] * sinpi8sqrt2) >> 16; 717bc9febe8749e98a3812a0dc4380ceae75c29450Johann temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); 727bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = temp1 - temp2; 731b362b15af34006e6a11974088a46d42b903418eJohann 747bc9febe8749e98a3812a0dc4380ceae75c29450Johann temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); 757bc9febe8749e98a3812a0dc4380ceae75c29450Johann temp2 = (ip[3] * sinpi8sqrt2) >> 16; 767bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1 = temp1 + temp2; 771b362b15af34006e6a11974088a46d42b903418eJohann 787bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[0] = (a1 + d1 + 4) >> 3; 797bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[3] = (a1 - d1 + 4) >> 3; 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 817bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[1] = (b1 + c1 + 4) >> 3; 827bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[2] = (b1 - c1 + 4) >> 3; 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 847bc9febe8749e98a3812a0dc4380ceae75c29450Johann ip += shortpitch; 857bc9febe8749e98a3812a0dc4380ceae75c29450Johann op += shortpitch; 867bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 87f71323e297a928af368937089d3ed71239786f86Andreas Huber 887bc9febe8749e98a3812a0dc4380ceae75c29450Johann ip = output; 897bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (r = 0; r < 4; ++r) { 907bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (c = 0; c < 4; ++c) { 917bc9febe8749e98a3812a0dc4380ceae75c29450Johann int a = ip[c] + pred_ptr[c]; 92f71323e297a928af368937089d3ed71239786f86Andreas Huber 937bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (a < 0) a = 0; 94f71323e297a928af368937089d3ed71239786f86Andreas Huber 957bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (a > 255) a = 255; 96f71323e297a928af368937089d3ed71239786f86Andreas Huber 977bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr[c] = (unsigned char)a; 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 997bc9febe8749e98a3812a0dc4380ceae75c29450Johann ip += 4; 1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_stride; 1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann pred_ptr += pred_stride; 1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1057bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, 1067bc9febe8749e98a3812a0dc4380ceae75c29450Johann int pred_stride, unsigned char *dst_ptr, 1077bc9febe8749e98a3812a0dc4380ceae75c29450Johann int dst_stride) { 1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann int a1 = ((input_dc + 4) >> 3); 1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann int r, c; 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (r = 0; r < 4; ++r) { 1127bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (c = 0; c < 4; ++c) { 1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann int a = a1 + pred_ptr[c]; 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (a < 0) a = 0; 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (a > 255) a = 255; 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1197bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr[c] = (unsigned char)a; 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 1211b362b15af34006e6a11974088a46d42b903418eJohann 1227bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_ptr += dst_stride; 1237bc9febe8749e98a3812a0dc4380ceae75c29450Johann pred_ptr += pred_stride; 1247bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 1257bc9febe8749e98a3812a0dc4380ceae75c29450Johann} 1267bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1277bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) { 1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann short output[16]; 1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann int i; 1307bc9febe8749e98a3812a0dc4380ceae75c29450Johann int a1, b1, c1, d1; 1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann int a2, b2, c2, d2; 1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann short *ip = input; 1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann short *op = output; 1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1357bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 4; ++i) { 1367bc9febe8749e98a3812a0dc4380ceae75c29450Johann a1 = ip[0] + ip[12]; 1377bc9febe8749e98a3812a0dc4380ceae75c29450Johann b1 = ip[4] + ip[8]; 1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = ip[4] - ip[8]; 1397bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1 = ip[0] - ip[12]; 1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[0] = a1 + b1; 1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[4] = c1 + d1; 1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[8] = a1 - b1; 1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[12] = d1 - c1; 1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann ip++; 1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann op++; 1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann ip = output; 1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann op = output; 1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 4; ++i) { 1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann a1 = ip[0] + ip[3]; 1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann b1 = ip[1] + ip[2]; 1557bc9febe8749e98a3812a0dc4380ceae75c29450Johann c1 = ip[1] - ip[2]; 1567bc9febe8749e98a3812a0dc4380ceae75c29450Johann d1 = ip[0] - ip[3]; 1577bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann a2 = a1 + b1; 1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann b2 = c1 + d1; 1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann c2 = a1 - b1; 1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann d2 = d1 - c1; 1627bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[0] = (a2 + 3) >> 3; 1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[1] = (b2 + 3) >> 3; 1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[2] = (c2 + 3) >> 3; 1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann op[3] = (d2 + 3) >> 3; 1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1687bc9febe8749e98a3812a0dc4380ceae75c29450Johann ip += 4; 1697bc9febe8749e98a3812a0dc4380ceae75c29450Johann op += 4; 1707bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1727bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 16; ++i) { 1737bc9febe8749e98a3812a0dc4380ceae75c29450Johann mb_dqcoeff[i * 16] = output[i]; 1747bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1777bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) { 1787bc9febe8749e98a3812a0dc4380ceae75c29450Johann int i; 1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann int a1; 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann a1 = ((input[0] + 3) >> 3); 1827bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (i = 0; i < 16; ++i) { 1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann mb_dqcoeff[i * 16] = a1; 1847bc9febe8749e98a3812a0dc4380ceae75c29450Johann } 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 186