16553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao/*
26553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * Copyright (C) 2010-2011 Intel Corporation
36553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao *
46553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * Licensed under the Apache License, Version 2.0 (the "License");
56553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * you may not use this file except in compliance with the License.
66553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * You may obtain a copy of the License at
76553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao *
86553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao *      http://www.apache.org/licenses/LICENSE-2.0
96553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao *
106553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * Unless required by applicable law or agreed to in writing, software
116553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * distributed under the License is distributed on an "AS IS" BASIS,
126553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
136553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * See the License for the specific language governing permissions and
146553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * limitations under the License.
156553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao */
166553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
176553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#define JPEG_INTERNALS
186553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#include "jinclude.h"
196553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#include "jpeglib.h"
206553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#include "jdct.h"		/* Private declarations for DCT subsystem */
216553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
226553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#ifdef ANDROID_INTELSSE2_IDCT
236553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#include <emmintrin.h>
246553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
256553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#if DCTSIZE != 8
266553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
276553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#endif
286553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
296553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#define BITS_INV_ACC 4
306553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#define SHIFT_INV_ROW 12
316553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#define SHIFT_INV_COL 5
326553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaoconst short RND_INV_ROW = 2048;
336553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaoconst short RND_INV_COL = 16;
346553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaoconst short RND_INV_CORR = 15;
356553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
366553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_one_corr[8] = {1,1,1,1,1,1,1,1};
376553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_round_inv_row[8] = {2048,0,2048,0,2048,0,2048,0};
386553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_round_inv_col[8] = {16,16,16,16,16,16,16,16};
396553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_round_inv_corr[8] = {15,15,15,15,15,15,15,15};
406553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
416553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_tg_1_16[8] = {13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036};
426553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_tg_2_16[8] = {27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146};
436553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_tg_3_16[8] = {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746};
446553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_cos_4_16[8] = {-19195, -19195, -19195, -19195, -19195, -19195, -19195, -19195};
456553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
466553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) jpeg_adjust[8] = {128, 128, 128, 128, 128, 128, 128, 128};
476553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
486553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao// Table for rows 0,4
496553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_tab_i_04[32] = {
506553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao16384, 21407, 16384, 8867,
516553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao16384, -8867, 16384, -21407,
526553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao16384, 8867, -16384, -21407,
536553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao-16384, 21407, 16384, -8867,
546553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao22725, 19266, 19266, -4520,
556553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao12873, -22725, 4520, -12873,
566553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao12873, 4520, -22725, -12873,
576553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao4520, 19266, 19266, -22725
586553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao};
596553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
606553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao// Table for rows 1,7
616553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_tab_i_17[32] = {
626553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao22725, 29692, 22725, 12299,
636553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao22725, -12299, 22725, -29692,
646553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao22725, 12299, -22725, -29692,
656553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao-22725, 29692, 22725, -12299,
666553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao31521, 26722, 26722, -6270,
676553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao17855, -31521, 6270, -17855,
686553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao17855, 6270, -31521, -17855,
696553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao6270, 26722, 26722, -31521
706553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao};
716553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
726553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao// Table for rows 2,6
736553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_tab_i_26[32] = {
746553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao21407, 27969, 21407, 11585,
756553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao21407, -11585, 21407, -27969,
766553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao21407, 11585, -21407, -27969,
776553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao-21407, 27969, 21407, -11585,
786553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao29692, 25172, 25172, -5906,
796553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao16819, -29692, 5906, -16819,
806553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao16819, 5906, -29692, -16819,
816553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao5906, 25172, 25172, -29692
826553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao};
836553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
846553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao// Table for rows 3,5
856553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaostatic const short __attribute__ ((aligned(16))) M128_tab_i_35[32] = {
866553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao19266, 25172, 19266, 10426,
876553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao19266, -10426, 19266, -25172,
886553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao19266, 10426, -19266, -25172,
896553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao-19266, 25172, 19266, -10426,
906553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao26722, 22654, 22654, -5315,
916553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao15137, -26722, 5315, -15137,
926553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao15137, 5315, -26722, -15137,
936553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao5315, 22654, 22654, -26722
946553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao};
956553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
966553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
976553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao/*
986553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao * Perform dequantization and inverse DCT on one block of coefficients by SSE.
996553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao */
1006553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
1016553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaoGLOBAL(void)
1026553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhaojpeg_idct_intelsse (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1036553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao		 JCOEFPTR coef_block,
1046553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao		 JSAMPARRAY output_buf, JDIMENSION output_col)
1056553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao{
1066553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  __m128i row0, tmp1, tmp2, tmp3, row2, tmp5, tmp6, tmp7;
1076553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  int ctr;
1086553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  JSAMPROW  outptrTemp;
1096553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1106553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  short __attribute__((aligned(16))) quantptrSSE[DCTSIZE2];
1116553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  short __attribute__((aligned(16))) workspaceSSE[DCTSIZE2];
1126553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  short __attribute__((aligned(16))) coef_blockSSE[DCTSIZE2];
1136553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  __m128i x0, x1, x2, x3, x4, x5, x6, x7;
1146553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  __m128i* tg3, *tg1, *tg2, *cos4;
1156553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  __m128i tm765, tp765, tm465, tp465, tp03, tm03, tp12, tm12, tp65, tm65;
1166553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  __m128i t0, t1, t2, t3, t4, t5, t6, t7;
1176553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  __m128i temp, temp2;
1186553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  short * wsptr;
1196553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  unsigned char * outptr;
1206553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
1216553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#define iDCT_8_2ROWs(table1, table2)   \
1226553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row0 = _mm_shufflelo_epi16(row0, 0xD8); /*x7, x6, x5, x4, x3, x1, x2, x0*/    \
1236553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row2 = _mm_shufflelo_epi16(row2, 0xD8);   \
1246553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp1 = _mm_shuffle_epi32(row0, 0);      /*x2, x0, x2, x0, x2, x0, x2, x0*/    \
1256553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp5 = _mm_shuffle_epi32(row2, 0);        \
1266553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao                                                                                  \
1276553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp3 = _mm_shuffle_epi32(row0, 0x55);   /*x3, x1, x3, x1, x3, x1, x3, x1*/    \
1286553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp7 = _mm_shuffle_epi32(row2, 0x55);     \
1296553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row0 = _mm_shufflehi_epi16(row0, 0xD8); /*x7, x5, x6, x4, x3, x1, x2, x0*/    \
1306553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row2 = _mm_shufflehi_epi16(row2, 0xD8);   \
1316553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao						\
1326553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp1 = _mm_madd_epi16(tmp1, * ( __m128i*)table1);      /*x2*w13+x0*w12, x2*w9+x0*w8, x2*w5+x0*w4, x2*w1+x0*w0*/   \
1336553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp5 = _mm_madd_epi16(tmp5, * ( __m128i*)table2);       \
1346553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao						\
1356553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp2 =  _mm_shuffle_epi32(row0, 0xAA);  /*x6, x4, x6, x4, x6, x4, x6, x4*/    \
1366553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp6 = _mm_shuffle_epi32(row2, 0xAA);     \
1376553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row0 = _mm_shuffle_epi32(row0, 0xFF);   /*x7, x5, x7, x5, x7, x5, x7, x5*/    \
1386553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row2 = _mm_shuffle_epi32(row2, 0xFF);     \
1396553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
1406553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp3 = _mm_madd_epi16(tmp3, * ( __m128i*)(table1+16)); /*x3*w29+x1*w28, x3*w25+x1*w24, x3*w21+x1*w20, x3*w17+x1*w16*/  \
1416553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp7 = _mm_madd_epi16(tmp7, * ( __m128i*)(table2+16) ); \
1426553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row0 = _mm_madd_epi16(row0, * ( __m128i*)(table1+24)); /*x7*w31+x5*w30, x7*w27+x5*w26, x7*w23+x5*w22, x7*w19+x5*w18*/  \
1436553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row2 = _mm_madd_epi16(row2, * ( __m128i*)(table2+24) ); \
1446553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp2 = _mm_madd_epi16(tmp2, * ( __m128i*)(table1+8) ); /*x6*w15+x4*w14, x6*w11+x4*w10, x6*w7+x4*w6, x6*w3+x4*w2*/  \
1456553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp6 = _mm_madd_epi16(tmp6, * ( __m128i*)(table2+8) );  \
1466553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao                                                             \
1476553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp1 = _mm_add_epi32(tmp1, * ( __m128i*)M128_round_inv_row);       \
1486553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp5 = _mm_add_epi32(tmp5, * ( __m128i*)M128_round_inv_row);      \
1496553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row0 = _mm_add_epi32(row0, tmp3);    /*b3, b2, b1, b0*/  \
1506553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row2 = _mm_add_epi32(row2, tmp7);                       \
1516553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp1 = _mm_add_epi32(tmp1, tmp2);    /*a3, a2, a1, a0*/  \
1526553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp5 = _mm_add_epi32(tmp5, tmp6);                       \
1536553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao                                                             \
1546553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp2 = tmp1;  \
1556553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp6 = tmp5;  \
1566553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp2 = _mm_sub_epi32(tmp2, row0); /*for row0. y4= a3-b3, y5=a2-b2, y6=a1-b1, y7=a0-b0 */   \
1576553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp6 = _mm_sub_epi32(tmp6, row2);  \
1586553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row0 = _mm_add_epi32(row0, tmp1); /*y3=a3+b3,y2=a2+b2,y1=a1+b1,y0=a0+b0*/   \
1596553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row2 = _mm_add_epi32(row2, tmp5);  \
1606553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp2 = _mm_srai_epi32(tmp2, SHIFT_INV_ROW);             \
1616553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp6 = _mm_srai_epi32(tmp6, SHIFT_INV_ROW);  \
1626553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row0 = _mm_srai_epi32(row0, SHIFT_INV_ROW);             \
1636553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row2 = _mm_srai_epi32(row2, SHIFT_INV_ROW);  \
1646553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp2 = _mm_shuffle_epi32(tmp2, 0x1B); /*y7, y6, y5, y4*/   \
1656553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tmp6 = _mm_shuffle_epi32(tmp6, 0x1B);  \
1666553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row0 = _mm_packs_epi32(row0, tmp2); /*row0 = y7,y6,y5,y4,y3,y2,y1,y0*/  \
1676553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    row2 = _mm_packs_epi32(row2, tmp6);  /*row2 = y7,...y0*/
1686553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
1696553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
1706553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#define iDCT_8_COL()  \
1716553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    x3 = _mm_load_si128(( __m128i*)(wsptr+24));\
1726553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    x1 = _mm_load_si128(( __m128i*)(wsptr+8));\
1736553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    x5 = row0;\
1746553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    x7 = row2;\
1756553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
1766553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tg3 = ( __m128i*)(M128_tg_3_16);\
1776553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tg1 = ( __m128i*)(M128_tg_1_16);\
1786553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tg2 = ( __m128i*)(M128_tg_2_16);\
1796553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    cos4 =(__m128i*)(M128_cos_4_16);\
1806553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
1816553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_mulhi_epi16(x5, *tg3);  /*row5*tg3*/ \
1826553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_mulhi_epi16(x3, *tg3);\
1836553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_adds_epi16(temp, x5); /*coef adjustment*/ \
1846553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_adds_epi16(temp2, x3);\
1856553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tm765 = _mm_adds_epi16(temp, x3);\
1866553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tm465 = _mm_subs_epi16(x5, temp2);\
1876553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
1886553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_mulhi_epi16(x7, *tg1);  /*row7*tg1*/ \
1896553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_mulhi_epi16(x1, *tg1);\
1906553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tp765 = _mm_adds_epi16(temp, x1);\
1916553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tp465 = _mm_subs_epi16(temp2, x7);  /*row1*tg1 - row7*/ \
1926553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
1936553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t7 = _mm_adds_epi16(tp765, tm765);\
1946553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t7 = _mm_adds_epi16(t7, *( __m128i*)M128_one_corr);\
1956553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tp65 = _mm_subs_epi16(tp765, tm765);\
1966553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t4 =  _mm_adds_epi16(tp465, tm465);\
1976553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tm65 = _mm_subs_epi16(tp465, tm465);\
1986553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tm65 = _mm_adds_epi16(tm65, *( __m128i*)M128_one_corr);\
1996553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2006553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    x0 = _mm_load_si128(( __m128i*)(wsptr));\
2016553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    x4 = _mm_load_si128(( __m128i*)(wsptr+32));\
2026553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    x2 = _mm_load_si128(( __m128i*)(wsptr+16));\
2036553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    x6 = _mm_load_si128(( __m128i*)(wsptr+48));\
2046553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2056553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    /*t6 = ( tp65 + tm65 ) * cos_4_16;*/ \
2066553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_adds_epi16(tp65, tm65);\
2076553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_subs_epi16(tp65, tm65);\
2086553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t6 = _mm_mulhi_epi16(temp, *cos4);\
2096553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t5 = _mm_mulhi_epi16(temp2, *cos4);\
2106553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t6 = _mm_adds_epi16(t6, temp);\
2116553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t6 = _mm_or_si128(t6, *( __m128i*)M128_one_corr);\
2126553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t5 = _mm_adds_epi16(t5, temp2);\
2136553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t5 = _mm_or_si128(t5, *( __m128i*)M128_one_corr);\
2146553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2156553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tp03 = _mm_adds_epi16(x0, x4);\
2166553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tp12 = _mm_subs_epi16(x0, x4);\
2176553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2186553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_mulhi_epi16(x6, *tg2);\
2196553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_mulhi_epi16(x2, *tg2);\
2206553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tm03 = _mm_adds_epi16(temp, x2);\
2216553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    tm12 = _mm_subs_epi16(temp2, x6);\
2226553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2236553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t0 = _mm_adds_epi16(tp03, tm03);\
2246553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t0 = _mm_adds_epi16(t0, *( __m128i*)M128_round_inv_col);\
2256553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t3 = _mm_subs_epi16(tp03, tm03);\
2266553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t3 = _mm_adds_epi16(t3, *( __m128i*)M128_round_inv_corr);\
2276553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t1 = _mm_adds_epi16(tp12, tm12);\
2286553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t1 = _mm_adds_epi16(t1, *( __m128i*)M128_round_inv_col);\
2296553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t2 = _mm_subs_epi16(tp12, tm12);\
2306553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    t2 = _mm_adds_epi16(t2, *( __m128i*)M128_round_inv_corr);\
2316553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2326553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_adds_epi16(t0, t7);   /*y0*/ \
2336553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_adds_epi16(t1, t6);  /*y1*/ \
2346553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_srai_epi16(temp, SHIFT_INV_COL);\
2356553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_srai_epi16(temp2, SHIFT_INV_COL);\
2366553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_adds_epi16(temp, *( __m128i*)jpeg_adjust); /*Add 128 for jpeg decoding*/ \
2376553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_adds_epi16(temp2, *( __m128i*)jpeg_adjust);\
2386553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2396553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_packus_epi16(temp, temp2);\
2406553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    _mm_store_si128(( __m128i*)(outptr), temp);  /*store y0, y1*/ \
2416553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2426553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_adds_epi16(t2, t5);\
2436553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_adds_epi16(t3, t4);\
2446553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_srai_epi16(temp, SHIFT_INV_COL);\
2456553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_srai_epi16(temp2, SHIFT_INV_COL);\
2466553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_adds_epi16(temp, *( __m128i*)jpeg_adjust);\
2476553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_adds_epi16(temp2, *( __m128i*)jpeg_adjust);\
2486553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2496553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_packus_epi16(temp, temp2);\
2506553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    _mm_store_si128(( __m128i*)(outptr+16), temp);  /*store y2, y3*/ \
2516553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2526553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_subs_epi16(t3, t4);\
2536553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_subs_epi16(t2, t5);\
2546553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_srai_epi16(temp, SHIFT_INV_COL);\
2556553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_srai_epi16(temp2, SHIFT_INV_COL);\
2566553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_adds_epi16(temp, *( __m128i*)jpeg_adjust);\
2576553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_adds_epi16(temp2, *( __m128i*)jpeg_adjust);\
2586553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2596553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_packus_epi16(temp, temp2);\
2606553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    _mm_store_si128(( __m128i*)(outptr+32), temp);  /*store y4, y5*/ \
2616553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2626553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_subs_epi16(t1, t6);\
2636553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_subs_epi16(t0, t7);\
2646553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_srai_epi16(temp, SHIFT_INV_COL);\
2656553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_srai_epi16(temp2, SHIFT_INV_COL);\
2666553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_adds_epi16(temp, *( __m128i*)jpeg_adjust);\
2676553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp2 = _mm_adds_epi16(temp2, *( __m128i*)jpeg_adjust);\
2686553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao\
2696553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    temp = _mm_packus_epi16(temp, temp2);\
2706553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    _mm_store_si128(( __m128i*)(outptr+48), temp);  /*store y6, y7*/
2716553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2726553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2736553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  /*Memcpy to do 16byte alignment. */
2746553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  memcpy((char*)quantptrSSE, (char*)compptr->dct_table, sizeof(quantptrSSE));
2756553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  memcpy((char*)coef_blockSSE, (char*)coef_block, sizeof(coef_blockSSE));
2766553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2776553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  wsptr = (short *)workspaceSSE;
2786553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  outptr = (unsigned char*)workspaceSSE;
2796553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2806553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  // row 0 and row 2
2816553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row0 = _mm_load_si128((__m128i const*)(coef_blockSSE));
2826553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row2 = _mm_load_si128((__m128i const*)(coef_blockSSE+8*2));
2836553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row0 = _mm_mullo_epi16( row0, *(__m128i const*)quantptrSSE );
2846553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row2 = _mm_mullo_epi16( row2, *(__m128i const*)(quantptrSSE+8*2) );
2856553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2866553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  iDCT_8_2ROWs(M128_tab_i_04, M128_tab_i_26);
2876553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2886553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  _mm_store_si128((__m128i*)(wsptr), row0);
2896553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  _mm_store_si128((__m128i*)(wsptr+8*2), row2);
2906553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2916553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  // row 4 and row 6
2926553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row0 = _mm_load_si128((__m128i const*)(coef_blockSSE+8*4));
2936553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row2 = _mm_load_si128((__m128i const*)(coef_blockSSE+8*6));
2946553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row0 = _mm_mullo_epi16(row0, *(__m128i const*)(quantptrSSE+8*4) );
2956553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row2 = _mm_mullo_epi16(row2, *(__m128i const*)(quantptrSSE+8*6) );
2966553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2976553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  iDCT_8_2ROWs(M128_tab_i_04, M128_tab_i_26);
2986553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
2996553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  _mm_store_si128((__m128i*)(wsptr+32), row0);
3006553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  _mm_store_si128((__m128i*)(wsptr+48), row2);
3016553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
3026553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  // row 3 and row 1
3036553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row0 = _mm_load_si128((__m128i const*)(coef_blockSSE+8*3));
3046553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row2 = _mm_load_si128((__m128i const*)(coef_blockSSE+8*1));
3056553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row0 = _mm_mullo_epi16(row0, *(__m128i const*)(quantptrSSE+24) );
3066553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row2 = _mm_mullo_epi16(row2, *(__m128i const*)(quantptrSSE+8) );
3076553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
3086553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  iDCT_8_2ROWs(M128_tab_i_35, M128_tab_i_17);
3096553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
3106553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  _mm_store_si128((__m128i*)(wsptr+24), row0);
3116553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  _mm_store_si128((__m128i*)(wsptr+8), row2);
3126553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
3136553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  // row 5 and row 7
3146553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row0 = _mm_load_si128((__m128i const*)(coef_blockSSE+8*5));
3156553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row2 = _mm_load_si128((__m128i const*)(coef_blockSSE+8*7));
3166553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row0 = _mm_mullo_epi16(row0, *(__m128i const*)(quantptrSSE+40) );
3176553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  row2 = _mm_mullo_epi16(row2, *(__m128i const*)(quantptrSSE+56));
3186553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
3196553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  iDCT_8_2ROWs( M128_tab_i_35, M128_tab_i_17);
3206553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
3216553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  iDCT_8_COL();
3226553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
3236553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  for(ctr = 0; ctr < DCTSIZE; ctr++)
3246553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  {
3256553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    outptrTemp = output_buf[ctr] + output_col;
3266553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    memcpy(outptrTemp, outptr, DCTSIZE);
3276553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao    outptr += DCTSIZE;   /* advance pointer to next row */
3286553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  }
3296553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao
3306553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao  return;
3316553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao}
3326553d244f16f1ce8e6b5e018fd1b49e98a3c1de3tengfei.zhao#endif /* ANDROID_INTELSSE2_IDCT */
333