170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine/* 270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * jidctflt.c 370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * 470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * Copyright (C) 1994-1998, Thomas G. Lane. 570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * This file is part of the Independent JPEG Group's software. 670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * For conditions of distribution and use, see the accompanying README file. 770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * 870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * This file contains a floating-point implementation of the 970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine 1070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * must also perform dequantization of the input coefficients. 1170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * 1270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * This implementation should be more accurate than either of the integer 1370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * IDCT implementations. However, it may not give the same results on all 1470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * machines because of differences in roundoff behavior. Speed will depend 1570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * on the hardware's floating point capacity. 1670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * 1770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT 1870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * on each row (or vice versa, but it's more convenient to emit a row at 1970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * a time). Direct algorithms are also available, but they are much more 2070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * complex and seem not to be any faster when reduced to code. 2170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * 2270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * This implementation is based on Arai, Agui, and Nakajima's algorithm for 2370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in 2470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * Japanese, but the algorithm is described in the Pennebaker & Mitchell 2570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * JPEG textbook (see REFERENCES section in file README). The following code 2670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * is based directly on figure 4-8 in P&M. 2770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * While an 8-point DCT cannot be done in less than 11 multiplies, it is 2870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * possible to arrange the computation so that many of the multiplies are 2970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * simple scalings of the final outputs. These multiplies can then be 3070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * folded into the multiplications or divisions by the JPEG quantization 3170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * table entries. The AA&N method leaves only 5 multiplies and 29 adds 3270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * to be done in the DCT itself. 3370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * The primary disadvantage of this method is that with a fixed-point 3470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * implementation, accuracy is lost due to imprecise representation of the 3570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * scaled quantization values. However, that problem does not arise if 3670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * we use floating point arithmetic. 3770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine */ 3870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 3970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#define JPEG_INTERNALS 4070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#include "jinclude.h" 4170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#include "jpeglib.h" 4270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#include "jdct.h" /* Private declarations for DCT subsystem */ 4370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 4470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#ifdef DCT_FLOAT_SUPPORTED 4570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 4670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 4770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine/* 4870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * This module is specialized to the case DCTSIZE = 8. 4970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine */ 5070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 5170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#if DCTSIZE != 8 5270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 5370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#endif 5470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 5570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 5670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine/* Dequantize a coefficient by multiplying it by the multiplier-table 5770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * entry; produce a float result. 5870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine */ 5970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 6070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#define DEQUANTIZE(coef,quantval) (((FAST_FLOAT) (coef)) * (quantval)) 6170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 6270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 6370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine/* 6470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * Perform dequantization and inverse DCT on one block of coefficients. 6570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine */ 6670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 6770a18cd874a22452aca9e39e22275ed4538ed20bVladimir ChtchetkineGLOBAL(void) 6870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkinejpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 6970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine JCOEFPTR coef_block, 7070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine JSAMPARRAY output_buf, JDIMENSION output_col) 7170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine{ 7270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 7370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine FAST_FLOAT tmp10, tmp11, tmp12, tmp13; 7470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine FAST_FLOAT z5, z10, z11, z12, z13; 7570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine JCOEFPTR inptr; 7670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine FLOAT_MULT_TYPE * quantptr; 7770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine FAST_FLOAT * wsptr; 7870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine JSAMPROW outptr; 7970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine JSAMPLE *range_limit = IDCT_range_limit(cinfo); 8070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine int ctr; 8170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */ 8270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine SHIFT_TEMPS 8370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 8470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Pass 1: process columns from input, store into work array. */ 8570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 8670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine inptr = coef_block; 8770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table; 8870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr = workspace; 8970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine for (ctr = DCTSIZE; ctr > 0; ctr--) { 9070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Due to quantization, we will usually find that many of the input 9170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * coefficients are zero, especially the AC terms. We can exploit this 9270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * by short-circuiting the IDCT calculation for any column in which all 9370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * the AC terms are zero. In that case each output is equal to the 9470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * DC coefficient (with scale factor as needed). 9570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * With typical images and quantization tables, half or more of the 9670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * column DCT calculations can be simplified this way. 9770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine */ 9870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 9970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 10070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 10170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 10270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine inptr[DCTSIZE*7] == 0) { 10370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* AC terms all zero */ 10470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 10570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 10670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*0] = dcval; 10770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*1] = dcval; 10870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*2] = dcval; 10970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*3] = dcval; 11070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*4] = dcval; 11170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*5] = dcval; 11270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*6] = dcval; 11370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*7] = dcval; 11470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 11570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine inptr++; /* advance pointers to next column */ 11670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine quantptr++; 11770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr++; 11870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine continue; 11970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine } 12070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 12170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Even part */ 12270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 12370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 12470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 12570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 12670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 12770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 12870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp10 = tmp0 + tmp2; /* phase 3 */ 12970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp11 = tmp0 - tmp2; 13070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 13170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp13 = tmp1 + tmp3; /* phases 5-3 */ 13270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */ 13370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 13470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp0 = tmp10 + tmp13; /* phase 2 */ 13570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp3 = tmp10 - tmp13; 13670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp1 = tmp11 + tmp12; 13770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp2 = tmp11 - tmp12; 13870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 13970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Odd part */ 14070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 14170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 14270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 14370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 14470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 14570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 14670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z13 = tmp6 + tmp5; /* phase 6 */ 14770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z10 = tmp6 - tmp5; 14870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z11 = tmp4 + tmp7; 14970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z12 = tmp4 - tmp7; 15070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 15170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp7 = z11 + z13; /* phase 5 */ 15270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */ 15370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 15470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ 15570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ 15670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ 15770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 15870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp6 = tmp12 - tmp7; /* phase 2 */ 15970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp5 = tmp11 - tmp6; 16070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp4 = tmp10 + tmp5; 16170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 16270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*0] = tmp0 + tmp7; 16370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*7] = tmp0 - tmp7; 16470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*1] = tmp1 + tmp6; 16570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*6] = tmp1 - tmp6; 16670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*2] = tmp2 + tmp5; 16770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*5] = tmp2 - tmp5; 16870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*4] = tmp3 + tmp4; 16970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr[DCTSIZE*3] = tmp3 - tmp4; 17070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 17170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine inptr++; /* advance pointers to next column */ 17270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine quantptr++; 17370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr++; 17470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine } 17570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 17670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Pass 2: process rows from work array, store into output array. */ 17770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Note that we must descale the results by a factor of 8 == 2**3. */ 17870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 17970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr = workspace; 18070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine for (ctr = 0; ctr < DCTSIZE; ctr++) { 18170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr = output_buf[ctr] + output_col; 18270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Rows of zeroes can be exploited in the same way as we did with columns. 18370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * However, the column calculation has created many nonzero AC terms, so 18470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * the simplification applies less often (typically 5% to 10% of the time). 18570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine * And testing floats for zero is relatively expensive, so we don't bother. 18670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine */ 18770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 18870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Even part */ 18970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 19070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp10 = wsptr[0] + wsptr[4]; 19170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp11 = wsptr[0] - wsptr[4]; 19270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 19370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp13 = wsptr[2] + wsptr[6]; 19470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13; 19570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 19670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp0 = tmp10 + tmp13; 19770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp3 = tmp10 - tmp13; 19870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp1 = tmp11 + tmp12; 19970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp2 = tmp11 - tmp12; 20070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 20170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Odd part */ 20270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 20370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z13 = wsptr[5] + wsptr[3]; 20470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z10 = wsptr[5] - wsptr[3]; 20570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z11 = wsptr[1] + wsptr[7]; 20670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z12 = wsptr[1] - wsptr[7]; 20770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 20870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp7 = z11 + z13; 20970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); 21070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 21170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ 21270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ 21370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ 21470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 21570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp6 = tmp12 - tmp7; 21670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp5 = tmp11 - tmp6; 21770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine tmp4 = tmp10 + tmp5; 21870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 21970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine /* Final output stage: scale down by a factor of 8 and range-limit */ 22070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 22170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3) 22270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine & RANGE_MASK]; 22370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3) 22470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine & RANGE_MASK]; 22570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3) 22670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine & RANGE_MASK]; 22770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3) 22870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine & RANGE_MASK]; 22970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3) 23070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine & RANGE_MASK]; 23170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3) 23270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine & RANGE_MASK]; 23370a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3) 23470a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine & RANGE_MASK]; 23570a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3) 23670a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine & RANGE_MASK]; 23770a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 23870a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine wsptr += DCTSIZE; /* advance pointer to next row */ 23970a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine } 24070a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine} 24170a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine 24270a18cd874a22452aca9e39e22275ed4538ed20bVladimir Chtchetkine#endif /* DCT_FLOAT_SUPPORTED */ 243