1f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/* 2f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * jidctflt.c 3f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * 4f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * Copyright (C) 1994-1998, Thomas G. Lane. 5f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This file is part of the Independent JPEG Group's software. 6f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * For conditions of distribution and use, see the accompanying README file. 7f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * 8f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This file contains a floating-point implementation of the 9f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine 10f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * must also perform dequantization of the input coefficients. 11f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * 12f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This implementation should be more accurate than either of the integer 13f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * IDCT implementations. However, it may not give the same results on all 14f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * machines because of differences in roundoff behavior. Speed will depend 15f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * on the hardware's floating point capacity. 16f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * 17f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT 18f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * on each row (or vice versa, but it's more convenient to emit a row at 19f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * a time). Direct algorithms are also available, but they are much more 20f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * complex and seem not to be any faster when reduced to code. 21f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * 22f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This implementation is based on Arai, Agui, and Nakajima's algorithm for 23f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in 24f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * Japanese, but the algorithm is described in the Pennebaker & Mitchell 25f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * JPEG textbook (see REFERENCES section in file README). The following code 26f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * is based directly on figure 4-8 in P&M. 27f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * While an 8-point DCT cannot be done in less than 11 multiplies, it is 28f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * possible to arrange the computation so that many of the multiplies are 29f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * simple scalings of the final outputs. These multiplies can then be 30f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * folded into the multiplications or divisions by the JPEG quantization 31f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * table entries. The AA&N method leaves only 5 multiplies and 29 adds 32f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * to be done in the DCT itself. 33f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * The primary disadvantage of this method is that with a fixed-point 34f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * implementation, accuracy is lost due to imprecise representation of the 35f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * scaled quantization values. However, that problem does not arise if 36f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * we use floating point arithmetic. 37f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */ 38f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 39f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define JPEG_INTERNALS 40f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#include "jinclude.h" 41f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#include "jpeglib.h" 42f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#include "jdct.h" /* Private declarations for DCT subsystem */ 43f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 44f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#ifdef DCT_FLOAT_SUPPORTED 45f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 46f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 47f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/* 48f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This module is specialized to the case DCTSIZE = 8. 49f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */ 50f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 51f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#if DCTSIZE != 8 52f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 53f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#endif 54f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 55f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 56f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/* Dequantize a coefficient by multiplying it by the multiplier-table 57f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * entry; produce a float result. 58f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */ 59f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 60f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define DEQUANTIZE(coef,quantval) (((FAST_FLOAT) (coef)) * (quantval)) 61f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 62f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 63f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/* 64f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients. 65f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */ 66f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 67f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.orgGLOBAL(void) 68f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.orgjpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 69f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org JCOEFPTR coef_block, 70f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org JSAMPARRAY output_buf, JDIMENSION output_col) 71f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org{ 72f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 73f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org FAST_FLOAT tmp10, tmp11, tmp12, tmp13; 74f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org FAST_FLOAT z5, z10, z11, z12, z13; 75f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org JCOEFPTR inptr; 76f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org FLOAT_MULT_TYPE * quantptr; 77f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org FAST_FLOAT * wsptr; 78f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org JSAMPROW outptr; 79f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org JSAMPLE *range_limit = IDCT_range_limit(cinfo); 80f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org int ctr; 81f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */ 82f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org SHIFT_TEMPS 83f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 84f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Pass 1: process columns from input, store into work array. */ 85f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 86f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org inptr = coef_block; 87f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table; 88f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr = workspace; 89f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org for (ctr = DCTSIZE; ctr > 0; ctr--) { 90f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Due to quantization, we will usually find that many of the input 91f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * coefficients are zero, especially the AC terms. We can exploit this 92f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * by short-circuiting the IDCT calculation for any column in which all 93f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * the AC terms are zero. In that case each output is equal to the 94f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * DC coefficient (with scale factor as needed). 95f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * With typical images and quantization tables, half or more of the 96f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * column DCT calculations can be simplified this way. 97f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */ 98f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 99f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 100f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 101f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 102f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org inptr[DCTSIZE*7] == 0) { 103f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* AC terms all zero */ 104f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 105f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 106f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*0] = dcval; 107f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*1] = dcval; 108f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*2] = dcval; 109f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*3] = dcval; 110f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*4] = dcval; 111f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*5] = dcval; 112f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*6] = dcval; 113f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*7] = dcval; 114f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 115f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org inptr++; /* advance pointers to next column */ 116f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org quantptr++; 117f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr++; 118f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org continue; 119f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org } 120f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 121f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Even part */ 122f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 123f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 124f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 125f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 126f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 127f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 128f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp10 = tmp0 + tmp2; /* phase 3 */ 129f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp11 = tmp0 - tmp2; 130f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 131f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp13 = tmp1 + tmp3; /* phases 5-3 */ 132f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */ 133f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 134f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp0 = tmp10 + tmp13; /* phase 2 */ 135f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp3 = tmp10 - tmp13; 136f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp1 = tmp11 + tmp12; 137f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp2 = tmp11 - tmp12; 138f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 139f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Odd part */ 140f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 141f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 142f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 143f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 144f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 145f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 146f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z13 = tmp6 + tmp5; /* phase 6 */ 147f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z10 = tmp6 - tmp5; 148f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z11 = tmp4 + tmp7; 149f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z12 = tmp4 - tmp7; 150f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 151f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp7 = z11 + z13; /* phase 5 */ 152f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */ 153f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 154f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ 155f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ 156f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ 157f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 158f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp6 = tmp12 - tmp7; /* phase 2 */ 159f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp5 = tmp11 - tmp6; 160f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp4 = tmp10 + tmp5; 161f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 162f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*0] = tmp0 + tmp7; 163f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*7] = tmp0 - tmp7; 164f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*1] = tmp1 + tmp6; 165f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*6] = tmp1 - tmp6; 166f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*2] = tmp2 + tmp5; 167f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*5] = tmp2 - tmp5; 168f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*4] = tmp3 + tmp4; 169f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr[DCTSIZE*3] = tmp3 - tmp4; 170f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 171f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org inptr++; /* advance pointers to next column */ 172f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org quantptr++; 173f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr++; 174f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org } 175f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 176f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Pass 2: process rows from work array, store into output array. */ 177f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Note that we must descale the results by a factor of 8 == 2**3. */ 178f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 179f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr = workspace; 180f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org for (ctr = 0; ctr < DCTSIZE; ctr++) { 181f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr = output_buf[ctr] + output_col; 182f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Rows of zeroes can be exploited in the same way as we did with columns. 183f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * However, the column calculation has created many nonzero AC terms, so 184f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * the simplification applies less often (typically 5% to 10% of the time). 185f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * And testing floats for zero is relatively expensive, so we don't bother. 186f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */ 187f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 188f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Even part */ 189f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 190f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp10 = wsptr[0] + wsptr[4]; 191f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp11 = wsptr[0] - wsptr[4]; 192f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 193f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp13 = wsptr[2] + wsptr[6]; 194f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13; 195f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 196f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp0 = tmp10 + tmp13; 197f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp3 = tmp10 - tmp13; 198f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp1 = tmp11 + tmp12; 199f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp2 = tmp11 - tmp12; 200f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 201f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Odd part */ 202f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 203f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z13 = wsptr[5] + wsptr[3]; 204f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z10 = wsptr[5] - wsptr[3]; 205f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z11 = wsptr[1] + wsptr[7]; 206f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z12 = wsptr[1] - wsptr[7]; 207f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 208f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp7 = z11 + z13; 209f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); 210f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 211f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ 212f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ 213f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ 214f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 215f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp6 = tmp12 - tmp7; 216f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp5 = tmp11 - tmp6; 217f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org tmp4 = tmp10 + tmp5; 218f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 219f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org /* Final output stage: scale down by a factor of 8 and range-limit */ 220f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 221f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3) 222f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org & RANGE_MASK]; 223f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3) 224f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org & RANGE_MASK]; 225f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3) 226f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org & RANGE_MASK]; 227f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3) 228f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org & RANGE_MASK]; 229f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3) 230f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org & RANGE_MASK]; 231f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3) 232f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org & RANGE_MASK]; 233f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3) 234f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org & RANGE_MASK]; 235f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3) 236f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org & RANGE_MASK]; 237f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 238f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org wsptr += DCTSIZE; /* advance pointer to next row */ 239f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org } 240f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org} 241f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org 242f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#endif /* DCT_FLOAT_SUPPORTED */ 243