190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/*
2f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
4f71323e297a928af368937089d3ed71239786f86Andreas Huber *  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber *  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber *  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber *  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber *  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber */
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vp8_rtcd.h"
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber/****************************************************************************
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * Notes:
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * This implementation makes use of 16 bit fixed point verio of two multiply
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * constants:
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *         1.   sqrt(2) * cos (pi/8)
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *         2.   sqrt(2) * sin (pi/8)
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * Becuase the first constant is bigger than 1, to maintain the same 16 bit
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * fixed point precision as the second one, we use a trick of
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *         x * a = x + x*(a-1)
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber * so
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber **************************************************************************/
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic const int cospi8sqrt2minus1 = 20091;
277bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic const int sinpi8sqrt2 = 35468;
281b362b15af34006e6a11974088a46d42b903418eJohann
291b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
301b362b15af34006e6a11974088a46d42b903418eJohann                            int pred_stride, unsigned char *dst_ptr,
317bc9febe8749e98a3812a0dc4380ceae75c29450Johann                            int dst_stride) {
327bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int i;
337bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int r, c;
347bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int a1, b1, c1, d1;
357bc9febe8749e98a3812a0dc4380ceae75c29450Johann  short output[16];
367bc9febe8749e98a3812a0dc4380ceae75c29450Johann  short *ip = input;
377bc9febe8749e98a3812a0dc4380ceae75c29450Johann  short *op = output;
387bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int temp1, temp2;
397bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int shortpitch = 4;
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
417bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < 4; ++i) {
427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    a1 = ip[0] + ip[8];
437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b1 = ip[0] - ip[8];
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    temp1 = (ip[4] * sinpi8sqrt2) >> 16;
467bc9febe8749e98a3812a0dc4380ceae75c29450Johann    temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    c1 = temp1 - temp2;
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    temp2 = (ip[12] * sinpi8sqrt2) >> 16;
517bc9febe8749e98a3812a0dc4380ceae75c29450Johann    d1 = temp1 + temp2;
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[shortpitch * 0] = a1 + d1;
547bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[shortpitch * 3] = a1 - d1;
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
567bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[shortpitch * 1] = b1 + c1;
577bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[shortpitch * 2] = b1 - c1;
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ip++;
607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op++;
617bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
637bc9febe8749e98a3812a0dc4380ceae75c29450Johann  ip = output;
647bc9febe8749e98a3812a0dc4380ceae75c29450Johann  op = output;
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
667bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < 4; ++i) {
677bc9febe8749e98a3812a0dc4380ceae75c29450Johann    a1 = ip[0] + ip[2];
687bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b1 = ip[0] - ip[2];
691b362b15af34006e6a11974088a46d42b903418eJohann
707bc9febe8749e98a3812a0dc4380ceae75c29450Johann    temp1 = (ip[1] * sinpi8sqrt2) >> 16;
717bc9febe8749e98a3812a0dc4380ceae75c29450Johann    temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
727bc9febe8749e98a3812a0dc4380ceae75c29450Johann    c1 = temp1 - temp2;
731b362b15af34006e6a11974088a46d42b903418eJohann
747bc9febe8749e98a3812a0dc4380ceae75c29450Johann    temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
757bc9febe8749e98a3812a0dc4380ceae75c29450Johann    temp2 = (ip[3] * sinpi8sqrt2) >> 16;
767bc9febe8749e98a3812a0dc4380ceae75c29450Johann    d1 = temp1 + temp2;
771b362b15af34006e6a11974088a46d42b903418eJohann
787bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[0] = (a1 + d1 + 4) >> 3;
797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[3] = (a1 - d1 + 4) >> 3;
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
817bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[1] = (b1 + c1 + 4) >> 3;
827bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[2] = (b1 - c1 + 4) >> 3;
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
847bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ip += shortpitch;
857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op += shortpitch;
867bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
87f71323e297a928af368937089d3ed71239786f86Andreas Huber
887bc9febe8749e98a3812a0dc4380ceae75c29450Johann  ip = output;
897bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (r = 0; r < 4; ++r) {
907bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (c = 0; c < 4; ++c) {
917bc9febe8749e98a3812a0dc4380ceae75c29450Johann      int a = ip[c] + pred_ptr[c];
92f71323e297a928af368937089d3ed71239786f86Andreas Huber
937bc9febe8749e98a3812a0dc4380ceae75c29450Johann      if (a < 0) a = 0;
94f71323e297a928af368937089d3ed71239786f86Andreas Huber
957bc9febe8749e98a3812a0dc4380ceae75c29450Johann      if (a > 255) a = 255;
96f71323e297a928af368937089d3ed71239786f86Andreas Huber
977bc9febe8749e98a3812a0dc4380ceae75c29450Johann      dst_ptr[c] = (unsigned char)a;
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
997bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ip += 4;
1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dst_ptr += dst_stride;
1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pred_ptr += pred_stride;
1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1057bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
1067bc9febe8749e98a3812a0dc4380ceae75c29450Johann                            int pred_stride, unsigned char *dst_ptr,
1077bc9febe8749e98a3812a0dc4380ceae75c29450Johann                            int dst_stride) {
1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int a1 = ((input_dc + 4) >> 3);
1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int r, c;
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (r = 0; r < 4; ++r) {
1127bc9febe8749e98a3812a0dc4380ceae75c29450Johann    for (c = 0; c < 4; ++c) {
1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann      int a = a1 + pred_ptr[c];
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann      if (a < 0) a = 0;
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann      if (a > 255) a = 255;
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1197bc9febe8749e98a3812a0dc4380ceae75c29450Johann      dst_ptr[c] = (unsigned char)a;
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    }
1211b362b15af34006e6a11974088a46d42b903418eJohann
1227bc9febe8749e98a3812a0dc4380ceae75c29450Johann    dst_ptr += dst_stride;
1237bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pred_ptr += pred_stride;
1247bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
1257bc9febe8749e98a3812a0dc4380ceae75c29450Johann}
1267bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1277bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) {
1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann  short output[16];
1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int i;
1307bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int a1, b1, c1, d1;
1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int a2, b2, c2, d2;
1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann  short *ip = input;
1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann  short *op = output;
1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1357bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < 4; ++i) {
1367bc9febe8749e98a3812a0dc4380ceae75c29450Johann    a1 = ip[0] + ip[12];
1377bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b1 = ip[4] + ip[8];
1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    c1 = ip[4] - ip[8];
1397bc9febe8749e98a3812a0dc4380ceae75c29450Johann    d1 = ip[0] - ip[12];
1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[0] = a1 + b1;
1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[4] = c1 + d1;
1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[8] = a1 - b1;
1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[12] = d1 - c1;
1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ip++;
1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op++;
1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann  ip = output;
1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann  op = output;
1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < 4; ++i) {
1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    a1 = ip[0] + ip[3];
1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b1 = ip[1] + ip[2];
1557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    c1 = ip[1] - ip[2];
1567bc9febe8749e98a3812a0dc4380ceae75c29450Johann    d1 = ip[0] - ip[3];
1577bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    a2 = a1 + b1;
1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b2 = c1 + d1;
1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    c2 = a1 - b1;
1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    d2 = d1 - c1;
1627bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[0] = (a2 + 3) >> 3;
1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[1] = (b2 + 3) >> 3;
1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[2] = (c2 + 3) >> 3;
1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op[3] = (d2 + 3) >> 3;
1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1687bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ip += 4;
1697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    op += 4;
1707bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1727bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < 16; ++i) {
1737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mb_dqcoeff[i * 16] = output[i];
1747bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1777bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) {
1787bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int i;
1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann  int a1;
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann  a1 = ((input[0] + 3) >> 3);
1827bc9febe8749e98a3812a0dc4380ceae75c29450Johann  for (i = 0; i < 16; ++i) {
1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mb_dqcoeff[i * 16] = a1;
1847bc9febe8749e98a3812a0dc4380ceae75c29450Johann  }
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber}
186