1f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/*
2f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * jidctint.c
3f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *
4f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * Copyright (C) 1991-1998, Thomas G. Lane.
5df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Modification developed 2002-2009 by Guido Vollbeding.
6f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This file is part of the Independent JPEG Group's software.
7f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * For conditions of distribution and use, see the accompanying README file.
8f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *
9f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This file contains a slow-but-accurate integer implementation of the
10f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
11f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * must also perform dequantization of the input coefficients.
12f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *
13f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * on each row (or vice versa, but it's more convenient to emit a row at
15f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * a time).  Direct algorithms are also available, but they are much more
16f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * complex and seem not to be any faster when reduced to code.
17f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *
18f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This implementation is based on an algorithm described in
19f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * The primary algorithm described there uses 11 multiplies and 29 adds.
23f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * We use their alternate method with 12 multiplies and 32 adds.
24f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * The advantage of this method is that no data path contains more than one
25f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * multiplication; this allows a very simple and accurate implementation in
26f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * scaled fixed-point arithmetic, with a minimal number of shifts.
27df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
28df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * We also provide IDCT routines with various output sample block sizes for
29df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * direct resolution reduction or enlargement without additional resampling:
30df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * NxN (N=1...16) pixels for one 8x8 input DCT block.
31df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
32df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * For N<8 we simply take the corresponding low-frequency coefficients of
33df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
34df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * to yield the downscaled outputs.
35df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * This can be seen as direct low-pass downsampling from the DCT domain
36df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * point of view rather than the usual spatial domain point of view,
37df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * yielding significant computational savings and results at least
38df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * as good as common bilinear (averaging) spatial downsampling.
39df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
40df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
41df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * lower frequencies and higher frequencies assumed to be zero.
42df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * It turns out that the computational effort is similar to the 8x8 IDCT
43df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * regarding the output size.
44df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Furthermore, the scaling and descaling is the same for all IDCT sizes.
45df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
46df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
47df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * since there would be too many additional constants to pre-calculate.
48f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */
49f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
50f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define JPEG_INTERNALS
51f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#include "jinclude.h"
52f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#include "jpeglib.h"
53f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#include "jdct.h"		/* Private declarations for DCT subsystem */
54f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
55f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#ifdef DCT_ISLOW_SUPPORTED
56f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
57f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
58f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/*
59f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * This module is specialized to the case DCTSIZE = 8.
60f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */
61f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
62f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#if DCTSIZE != 8
63df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
64f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#endif
65f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
66f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
67f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/*
68f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * The poop on this scaling stuff is as follows:
69f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *
70f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
71f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * larger than the true IDCT outputs.  The final outputs are therefore
72f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * a factor of N larger than desired; since N=8 this can be cured by
73f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * a simple right shift at the end of the algorithm.  The advantage of
74f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * this arrangement is that we save two multiplications per 1-D IDCT,
75f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * because the y0 and y4 inputs need not be divided by sqrt(N).
76f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *
77f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * We have to do addition and subtraction of the integer inputs, which
78f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * is no problem, and multiplication by fractional constants, which is
79f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * a problem to do in integer arithmetic.  We multiply all the constants
80f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * by CONST_SCALE and convert them to integer constants (thus retaining
81f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * CONST_BITS bits of precision in the constants).  After doing a
82f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * multiplication we have to divide the product by CONST_SCALE, with proper
83f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * rounding, to produce the correct output.  This division can be done
84f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
85f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * as long as possible so that partial sums can be added together with
86f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * full fractional precision.
87f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *
88f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * The outputs of the first pass are scaled up by PASS1_BITS bits so that
89f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * they are represented to better-than-integral precision.  These outputs
90f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
91f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * with the recommended scaling.  (To scale up 12-bit sample data further, an
92f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * intermediate INT32 array would be needed.)
93f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org *
94f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * To avoid overflow of the 32-bit intermediate results in pass 2, we must
95f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
96f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * shows that the values given below are the most effective.
97f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */
98f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
99f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#if BITS_IN_JSAMPLE == 8
100f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define CONST_BITS  13
101f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define PASS1_BITS  2
102f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#else
103f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define CONST_BITS  13
104f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
105f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#endif
106f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
107f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
108f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * causing a lot of useless floating-point operations at run time.
109f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * To get around this we use the following pre-calculated constants.
110f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * If you change CONST_BITS you may want to add appropriate values.
111f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * (With a reasonable C compiler, you can just rely on the FIX() macro...)
112f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */
113f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
114f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#if CONST_BITS == 13
115f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
116f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
117f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
118f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
119f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
120f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
121f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
122f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
123f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
124f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
125f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
126f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
127f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#else
128f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_298631336  FIX(0.298631336)
129f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_390180644  FIX(0.390180644)
130f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_541196100  FIX(0.541196100)
131f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_765366865  FIX(0.765366865)
132f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_0_899976223  FIX(0.899976223)
133f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_1_175875602  FIX(1.175875602)
134f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_1_501321110  FIX(1.501321110)
135f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_1_847759065  FIX(1.847759065)
136f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_1_961570560  FIX(1.961570560)
137f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_2_053119869  FIX(2.053119869)
138f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_2_562915447  FIX(2.562915447)
139f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define FIX_3_072711026  FIX(3.072711026)
140f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#endif
141f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
142f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
143f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
144f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * For 8-bit samples with the recommended scaling, all the variable
145f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * and constant values involved are no more than 16 bits wide, so a
146f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
147f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * For 12-bit samples, a full 32-bit multiplication will be needed.
148f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */
149f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
150f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#if BITS_IN_JSAMPLE == 8
151f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
152f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#else
153f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define MULTIPLY(var,const)  ((var) * (const))
154f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#endif
155f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
156f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
157f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/* Dequantize a coefficient by multiplying it by the multiplier-table
158f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * entry; produce an int result.  In this module, both inputs and result
159f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * are 16 bits or less, so either int or short multiply will work.
160f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */
161f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
162f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
163f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
164f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
165f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org/*
166f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients.
167f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org */
168f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
169f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.orgGLOBAL(void)
170f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.orgjpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
171f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org		 JCOEFPTR coef_block,
172f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org		 JSAMPARRAY output_buf, JDIMENSION output_col)
173f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org{
174f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  INT32 tmp0, tmp1, tmp2, tmp3;
175f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  INT32 tmp10, tmp11, tmp12, tmp13;
176f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  INT32 z1, z2, z3, z4, z5;
177f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  JCOEFPTR inptr;
178f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
179f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  int * wsptr;
180f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  JSAMPROW outptr;
181f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
182f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  int ctr;
183f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  int workspace[DCTSIZE2];	/* buffers data between passes */
184f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  SHIFT_TEMPS
185f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
186f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
187f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
188f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  /* furthermore, we scale the results by 2**PASS1_BITS. */
189f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
190f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  inptr = coef_block;
191f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
192f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  wsptr = workspace;
193f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  for (ctr = DCTSIZE; ctr > 0; ctr--) {
194f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* Due to quantization, we will usually find that many of the input
195f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * coefficients are zero, especially the AC terms.  We can exploit this
196f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * by short-circuiting the IDCT calculation for any column in which all
197f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * the AC terms are zero.  In that case each output is equal to the
198f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * DC coefficient (with scale factor as needed).
199f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * With typical images and quantization tables, half or more of the
200f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * column DCT calculations can be simplified this way.
201f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     */
202f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
203f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
204f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
205f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
206f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org	inptr[DCTSIZE*7] == 0) {
207f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      /* AC terms all zero */
208f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
209f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
210f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr[DCTSIZE*0] = dcval;
211f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr[DCTSIZE*1] = dcval;
212f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr[DCTSIZE*2] = dcval;
213f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr[DCTSIZE*3] = dcval;
214f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr[DCTSIZE*4] = dcval;
215f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr[DCTSIZE*5] = dcval;
216f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr[DCTSIZE*6] = dcval;
217f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr[DCTSIZE*7] = dcval;
218f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
219f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      inptr++;			/* advance pointers to next column */
220f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      quantptr++;
221f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr++;
222f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      continue;
223f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    }
224f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
225f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* Even part: reverse the even part of the forward DCT. */
226f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* The rotator is sqrt(2)*c(-6). */
227f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
228f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
229f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
230f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
231f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
232f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
233f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
234f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
235f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
236f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
237f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
238f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp0 = (z2 + z3) << CONST_BITS;
239f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp1 = (z2 - z3) << CONST_BITS;
240f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
241f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp10 = tmp0 + tmp3;
242f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp13 = tmp0 - tmp3;
243f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp11 = tmp1 + tmp2;
244f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp12 = tmp1 - tmp2;
245f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
246f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* Odd part per figure 8; the matrix is unitary and hence its
247f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
248f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     */
249f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
250f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
251f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
252f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
253f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
254f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
255f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z1 = tmp0 + tmp3;
256f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z2 = tmp1 + tmp2;
257f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 = tmp0 + tmp2;
258f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z4 = tmp1 + tmp3;
259f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
260f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
261f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
262f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
263f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
264f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
265f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
266f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
267f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
268f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
269f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
270f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 += z5;
271f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z4 += z5;
272f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
273f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp0 += z1 + z3;
274f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp1 += z2 + z4;
275f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp2 += z2 + z3;
276f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp3 += z1 + z4;
277f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
278f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
279f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
280f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
281f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
282f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
283f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
284f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
285f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
286f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
287f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
288f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
289f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    inptr++;			/* advance pointers to next column */
290f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    quantptr++;
291f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr++;
292f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  }
293f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
294f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  /* Pass 2: process rows from work array, store into output array. */
295f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  /* Note that we must descale the results by a factor of 8 == 2**3, */
296f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  /* and also undo the PASS1_BITS scaling. */
297f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
298f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  wsptr = workspace;
299f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  for (ctr = 0; ctr < DCTSIZE; ctr++) {
300f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr = output_buf[ctr] + output_col;
301f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* Rows of zeroes can be exploited in the same way as we did with columns.
302f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * However, the column calculation has created many nonzero AC terms, so
303f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * the simplification applies less often (typically 5% to 10% of the time).
304f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * On machines with very fast multiplication, it's possible that the
305f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * test takes more time than it's worth.  In that case this section
306f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * may be commented out.
307f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     */
308f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
309f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#ifndef NO_ZERO_ROW_TEST
310f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
311f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
312f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      /* AC terms all zero */
313f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
314f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org				  & RANGE_MASK];
315f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
316f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      outptr[0] = dcval;
317f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      outptr[1] = dcval;
318f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      outptr[2] = dcval;
319f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      outptr[3] = dcval;
320f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      outptr[4] = dcval;
321f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      outptr[5] = dcval;
322f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      outptr[6] = dcval;
323f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      outptr[7] = dcval;
324f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
325f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      wsptr += DCTSIZE;		/* advance pointer to next row */
326f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org      continue;
327f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    }
328f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#endif
329f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
330f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* Even part: reverse the even part of the forward DCT. */
331f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* The rotator is sqrt(2)*c(-6). */
332f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
333f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z2 = (INT32) wsptr[2];
334f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 = (INT32) wsptr[6];
335f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
336f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
337f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
338f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
339f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
340f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
341f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
342f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
343f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp10 = tmp0 + tmp3;
344f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp13 = tmp0 - tmp3;
345f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp11 = tmp1 + tmp2;
346f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp12 = tmp1 - tmp2;
347f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
348f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* Odd part per figure 8; the matrix is unitary and hence its
349f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
350f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org     */
351f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
352f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp0 = (INT32) wsptr[7];
353f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp1 = (INT32) wsptr[5];
354f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp2 = (INT32) wsptr[3];
355f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp3 = (INT32) wsptr[1];
356f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
357f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z1 = tmp0 + tmp3;
358f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z2 = tmp1 + tmp2;
359f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 = tmp0 + tmp2;
360f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z4 = tmp1 + tmp3;
361f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
362f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
363f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
364f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
365f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
366f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
367f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
368f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
369f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
370f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
371f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
372f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z3 += z5;
373f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    z4 += z5;
374f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
375f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp0 += z1 + z3;
376f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp1 += z2 + z4;
377f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp2 += z2 + z3;
378f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    tmp3 += z1 + z4;
379f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
380f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
381f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
382f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
383f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org					  CONST_BITS+PASS1_BITS+3)
384f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org			    & RANGE_MASK];
385f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
386f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org					  CONST_BITS+PASS1_BITS+3)
387f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org			    & RANGE_MASK];
388f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
389f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org					  CONST_BITS+PASS1_BITS+3)
390f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org			    & RANGE_MASK];
391f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
392f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org					  CONST_BITS+PASS1_BITS+3)
393f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org			    & RANGE_MASK];
394f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
395f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org					  CONST_BITS+PASS1_BITS+3)
396f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org			    & RANGE_MASK];
397f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
398f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org					  CONST_BITS+PASS1_BITS+3)
399f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org			    & RANGE_MASK];
400f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
401f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org					  CONST_BITS+PASS1_BITS+3)
402f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org			    & RANGE_MASK];
403f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
404f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org					  CONST_BITS+PASS1_BITS+3)
405f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org			    & RANGE_MASK];
406f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
407f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org    wsptr += DCTSIZE;		/* advance pointer to next row */
408f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org  }
409f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org}
410f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org
411df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org#ifdef IDCT_SCALING_SUPPORTED
412df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
413df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
414df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
415df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
416df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 7x7 output block.
417df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
418df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 12 multiplications in the 1-D kernel.
419df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/14).
420df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
421df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
422df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
423df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
424df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JCOEFPTR coef_block,
425df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JSAMPARRAY output_buf, JDIMENSION output_col)
426df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
427df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
428df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3;
429df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
430df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
431df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
432df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
433df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
434df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
435df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[7*7];	/* buffers data between passes */
436df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
437df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
438df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
439df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
440df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
441df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
442df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
443df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
444df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
445df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
446df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
447df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 <<= CONST_BITS;
448df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
449df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
450df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
451df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
452df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
453df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
454df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
455df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
456df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
457df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
458df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = z1 + z3;
459df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 -= tmp0;
460df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
461df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
462df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
463df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
464df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
465df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
466df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
467df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
468df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
469df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
470df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
471df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
472df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
473df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = tmp1 - tmp2;
474df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 += tmp2;
475df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
476df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 += tmp2;
477df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
478df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 += z2;
479df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
480df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
481df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
482df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
483df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
484df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
485df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
486df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
487df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
488df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
489df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
490df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
491df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
492df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 7 rows from work array, store into output array. */
493df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
494df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
495df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 7; ctr++) {
496df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
497df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
498df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
499df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
500df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
501df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
502df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 <<= CONST_BITS;
503df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
504df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[2];
505df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[4];
506df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[6];
507df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
508df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
509df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
510df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
511df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = z1 + z3;
512df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 -= tmp0;
513df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
514df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
515df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
516df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
517df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
518df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
519df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
520df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
521df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
522df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
523df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
524df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
525df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
526df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = tmp1 - tmp2;
527df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 += tmp2;
528df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
529df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 += tmp2;
530df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
531df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 += z2;
532df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
533df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
534df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
535df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
536df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
537df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
538df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
539df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
540df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
541df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
542df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
543df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
544df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
545df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
546df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
547df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
548df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
549df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
550df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
551df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
552df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
553df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
554df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
555df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
556df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
557df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
558df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 7;		/* advance pointer to next row */
559df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
560df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
561df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
562df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
563df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
564df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
565df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a reduced-size 6x6 output block.
566df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
567df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 3 multiplications in the 1-D kernel.
568df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/12).
569df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
570df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
571df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
572df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
573df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JCOEFPTR coef_block,
574df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JSAMPARRAY output_buf, JDIMENSION output_col)
575df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
576df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
577df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3;
578df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
579df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
580df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
581df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
582df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
583df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
584df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[6*6];	/* buffers data between passes */
585df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
586df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
587df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
588df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
589df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
590df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
591df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
592df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
593df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
594df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
595df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
596df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 <<= CONST_BITS;
597df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
598df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
599df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
600df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
601df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = tmp0 + tmp10;
602df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
603df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
604df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
605df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp1 + tmp0;
606df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = tmp1 - tmp0;
607df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
608df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
609df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
610df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
611df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
612df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
613df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
614df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
615df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
616df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
617df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
618df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
619df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
620df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
621df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
622df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[6*1] = (int) (tmp11 + tmp1);
623df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[6*4] = (int) (tmp11 - tmp1);
624df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
625df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
626df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
627df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
628df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 6 rows from work array, store into output array. */
629df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
630df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
631df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 6; ctr++) {
632df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
633df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
634df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
635df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
636df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
637df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
638df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 <<= CONST_BITS;
639df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = (INT32) wsptr[4];
640df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
641df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = tmp0 + tmp10;
642df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp0 - tmp10 - tmp10;
643df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = (INT32) wsptr[2];
644df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
645df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp1 + tmp0;
646df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = tmp1 - tmp0;
647df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
648df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
649df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
650df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
651df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
652df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
653df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
654df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
655df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
656df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = (z1 - z2 - z3) << CONST_BITS;
657df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
658df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
659df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
660df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
661df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
662df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
663df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
664df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
665df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
666df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
667df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
668df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
669df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
670df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
671df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
672df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
673df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
674df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
675df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
676df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
677df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
678df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
679df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 6;		/* advance pointer to next row */
680df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
681df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
682df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
683df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
684df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
685df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
686df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a reduced-size 5x5 output block.
687df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
688df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 5 multiplications in the 1-D kernel.
689df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/10).
690df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
691df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
692df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
693df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
694df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JCOEFPTR coef_block,
695df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JSAMPARRAY output_buf, JDIMENSION output_col)
696df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
697df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
698df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3;
699df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
700df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
701df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
702df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
703df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
704df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
705df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[5*5];	/* buffers data between passes */
706df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
707df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
708df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
709df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
710df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
711df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
712df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
713df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
714df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
715df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
716df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
717df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 <<= CONST_BITS;
718df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
719df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
720df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
721df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
722df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
723df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
724df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = tmp12 + z2;
725df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z3 + z1;
726df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 - z1;
727df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 -= z2 << 2;
728df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
729df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
730df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
731df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
732df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
733df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
734df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
735df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
736df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
737df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
738df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
739df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
740df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
741df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
742df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
743df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
744df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
745df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
746df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
747df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 5 rows from work array, store into output array. */
748df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
749df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
750df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 5; ctr++) {
751df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
752df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
753df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
754df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
755df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
756df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
757df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 <<= CONST_BITS;
758df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = (INT32) wsptr[2];
759df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = (INT32) wsptr[4];
760df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
761df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
762df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = tmp12 + z2;
763df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z3 + z1;
764df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 - z1;
765df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 -= z2 << 2;
766df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
767df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
768df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
769df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[1];
770df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[3];
771df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
772df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
773df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
774df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
775df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
776df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
777df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
778df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
779df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
780df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
781df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
782df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
783df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
784df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
785df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
786df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
787df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
788df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
789df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
790df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
791df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
792df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
793df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
794df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 5;		/* advance pointer to next row */
795df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
796df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
797df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
798df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
799df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
800df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
801df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a reduced-size 3x3 output block.
802df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
803df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 2 multiplications in the 1-D kernel.
804df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/6).
805df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
806df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
807df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
808df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
809df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JCOEFPTR coef_block,
810df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JSAMPARRAY output_buf, JDIMENSION output_col)
811df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
812df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp0, tmp2, tmp10, tmp12;
813df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
814df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
815df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
816df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
817df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
818df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
819df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[3*3];	/* buffers data between passes */
820df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
821df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
822df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
823df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
824df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
825df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
826df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
827df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
828df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
829df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
830df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
831df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 <<= CONST_BITS;
832df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
833df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
834df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
835df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
836df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp0 + tmp12;
837df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = tmp0 - tmp12 - tmp12;
838df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
839df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
840df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
841df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
842df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
843df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
844df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
845df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
846df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
847df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
848df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
849df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
850df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
851df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 3 rows from work array, store into output array. */
852df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
853df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
854df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 3; ctr++) {
855df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
856df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
857df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
858df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
859df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
860df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
861df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 <<= CONST_BITS;
862df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = (INT32) wsptr[2];
863df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
864df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp0 + tmp12;
865df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = tmp0 - tmp12 - tmp12;
866df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
867df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
868df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
869df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = (INT32) wsptr[1];
870df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
871df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
872df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
873df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
874df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
875df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
876df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
877df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
878df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
879df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
880df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
881df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
882df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
883df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
884df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 3;		/* advance pointer to next row */
885df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
886df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
887df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
888df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
889df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
890df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
891df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 9x9 output block.
892df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
893df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 10 multiplications in the 1-D kernel.
894df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/18).
895df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
896df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
897df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
898df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
899df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JCOEFPTR coef_block,
900df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	       JSAMPARRAY output_buf, JDIMENSION output_col)
901df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
902df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
903df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3, z4;
904df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
905df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
906df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
907df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
908df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
909df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
910df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[8*9];	/* buffers data between passes */
911df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
912df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
913df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
914df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
915df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
916df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
917df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
918df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
919df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
920df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
921df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
922df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 <<= CONST_BITS;
923df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
924df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
925df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
926df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
927df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
928df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
929df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
930df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
931df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = tmp0 + tmp3;
932df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = tmp0 - tmp3 - tmp3;
933df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
934df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
935df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp2 + tmp0;
936df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = tmp2 - tmp0 - tmp0;
937df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
938df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
939df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
940df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
941df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
942df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp1 + tmp0 - tmp3;
943df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = tmp1 - tmp0 + tmp2;
944df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = tmp1 - tmp2 + tmp3;
945df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
946df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
947df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
948df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
949df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
950df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
951df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
952df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
953df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
954df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
955df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
956df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
957df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = tmp2 + tmp3 - z2;
958df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
959df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 += z2 - tmp1;
960df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 += z2 + tmp1;
961df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
962df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
963df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
964df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
965df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
966df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
967df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
968df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
969df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
970df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
971df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
972df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
973df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
974df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
975df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
976df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 9 rows from work array, store into output array. */
977df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
978df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
979df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 9; ctr++) {
980df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
981df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
982df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
983df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
984df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
985df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
986df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 <<= CONST_BITS;
987df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
988df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[2];
989df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[4];
990df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[6];
991df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
992df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
993df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = tmp0 + tmp3;
994df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = tmp0 - tmp3 - tmp3;
995df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
996df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
997df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp2 + tmp0;
998df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = tmp2 - tmp0 - tmp0;
999df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1000df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1001df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
1002df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
1003df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1004df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp1 + tmp0 - tmp3;
1005df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = tmp1 - tmp0 + tmp2;
1006df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = tmp1 - tmp2 + tmp3;
1007df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1008df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1009df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1010df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
1011df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
1012df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
1013df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[7];
1014df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1015df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
1016df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1017df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
1018df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
1019df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = tmp2 + tmp3 - z2;
1020df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
1021df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 += z2 - tmp1;
1022df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 += z2 + tmp1;
1023df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1024df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1025df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1026df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1027df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1028df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1029df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1030df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1031df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1032df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1033df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1034df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1035df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1036df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1037df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1038df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1039df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1040df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1041df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1042df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1043df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1044df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1045df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1046df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1047df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1048df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1049df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1050df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1051df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1052df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1053df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1054df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1055df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 8;		/* advance pointer to next row */
1056df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1057df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
1058df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1059df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1060df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
1061df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
1062df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 10x10 output block.
1063df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
1064df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 12 multiplications in the 1-D kernel.
1065df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/20).
1066df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
1067df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1068df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
1069df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1070df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JCOEFPTR coef_block,
1071df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JSAMPARRAY output_buf, JDIMENSION output_col)
1072df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
1073df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1074df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1075df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3, z4, z5;
1076df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
1077df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
1078df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
1079df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
1080df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1081df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
1082df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[8*10];	/* buffers data between passes */
1083df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
1084df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1085df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
1086df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1087df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
1088df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1089df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1090df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1091df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1092df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1093df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1094df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 <<= CONST_BITS;
1095df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1096df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1097df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1098df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
1099df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
1100df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z3 + z1;
1101df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 - z2;
1102df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1103df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
1104df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			CONST_BITS-PASS1_BITS);
1105df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1106df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1107df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1108df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1109df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
1110df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1111df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1112df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1113df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp10 + tmp12;
1114df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = tmp10 - tmp12;
1115df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp11 + tmp13;
1116df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = tmp11 - tmp13;
1117df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1118df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1119df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1120df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1121df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1122df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1123df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1124df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1125df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z2 + z4;
1126df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z2 - z4;
1127df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1128df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
1129df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z5 = z3 << CONST_BITS;
1130df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1131df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
1132df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = z5 + tmp12;
1133df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1134df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1135df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1136df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1137df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
1138df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1139df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1140df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1141df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1142df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1143df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1144df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1145df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1146df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1147df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1148df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1149df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1150df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1151df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*2] = (int) (tmp22 + tmp12);
1152df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*7] = (int) (tmp22 - tmp12);
1153df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1154df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1155df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1156df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1157df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1158df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1159df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 10 rows from work array, store into output array. */
1160df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1161df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1162df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 10; ctr++) {
1163df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
1164df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1165df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1166df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1167df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1168df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1169df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 <<= CONST_BITS;
1170df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[4];
1171df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
1172df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
1173df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z3 + z1;
1174df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 - z2;
1175df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1176df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
1177df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1178df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[2];
1179df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[6];
1180df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1181df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
1182df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1183df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1184df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1185df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp10 + tmp12;
1186df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = tmp10 - tmp12;
1187df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp11 + tmp13;
1188df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = tmp11 - tmp13;
1189df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1190df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1191df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1192df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
1193df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
1194df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
1195df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 <<= CONST_BITS;
1196df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[7];
1197df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1198df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z2 + z4;
1199df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z2 - z4;
1200df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1201df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
1202df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1203df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
1204df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = z3 + tmp12;
1205df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1206df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1207df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1208df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1209df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
1210df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1211df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1212df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1213df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1214df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1215df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1216df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1217df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1218df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1219df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1220df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1221df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1222df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1223df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1224df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1225df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1226df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1227df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1228df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1229df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1230df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1231df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1232df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1233df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1234df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1235df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1236df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1237df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1238df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1239df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1240df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1241df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1242df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1243df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1244df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1245df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1246df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1247df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					      CONST_BITS+PASS1_BITS+3)
1248df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			    & RANGE_MASK];
1249df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1250df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 8;		/* advance pointer to next row */
1251df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1252df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
1253df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1254df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1255df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
1256df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
1257df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 11x11 output block.
1258df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
1259df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 24 multiplications in the 1-D kernel.
1260df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/22).
1261df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
1262df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1263df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
1264df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1265df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JCOEFPTR coef_block,
1266df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JSAMPARRAY output_buf, JDIMENSION output_col)
1267df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
1268df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1269df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1270df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3, z4;
1271df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
1272df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
1273df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
1274df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
1275df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1276df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
1277df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[8*11];	/* buffers data between passes */
1278df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
1279df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1280df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
1281df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1282df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
1283df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1284df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1285df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1286df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1287df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1288df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1289df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 <<= CONST_BITS;
1290df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1291df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1292df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1293df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1294df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1295df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1296df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1297df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
1298df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
1299df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = z1 + z3;
1300df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
1301df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 -= z2;
1302df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
1303df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp20 + tmp23 + tmp25 -
1304df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
1305df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1306df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1307df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 += tmp25;
1308df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
1309df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
1310df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
1311df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
1312df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1313df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1314df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1315df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1316df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1317df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1318df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1319df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1320df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z1 + z2;
1321df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1322df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
1323df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
1324df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1325df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp11 + tmp12 + tmp13 -
1326df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
1327df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1328df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
1329df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
1330df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
1331df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z1;
1332df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
1333df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
1334df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
1335df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
1336df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1337df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1338df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1339df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1340df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1341df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1342df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1343df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1344df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1345df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1346df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1347df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1348df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1349df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1350df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1351df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1352df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 11 rows from work array, store into output array. */
1353df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1354df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1355df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 11; ctr++) {
1356df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
1357df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1358df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1359df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1360df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1361df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1362df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 <<= CONST_BITS;
1363df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1364df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[2];
1365df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[4];
1366df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[6];
1367df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1368df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
1369df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
1370df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = z1 + z3;
1371df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
1372df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 -= z2;
1373df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
1374df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp20 + tmp23 + tmp25 -
1375df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
1376df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1377df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1378df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 += tmp25;
1379df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
1380df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
1381df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
1382df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
1383df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1384df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1385df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1386df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
1387df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
1388df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
1389df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[7];
1390df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1391df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z1 + z2;
1392df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1393df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
1394df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
1395df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1396df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp11 + tmp12 + tmp13 -
1397df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
1398df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1399df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
1400df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
1401df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
1402df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z1;
1403df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
1404df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
1405df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
1406df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
1407df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1408df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1409df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1410df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1411df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1412df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1413df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1414df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1415df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1416df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1417df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1418df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1419df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1420df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1421df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1422df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1423df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1424df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1425df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1426df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1427df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1428df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1429df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1430df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1431df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1432df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1433df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1434df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1435df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1436df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1437df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1438df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1439df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1440df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
1441df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1442df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1443df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1444df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 8;		/* advance pointer to next row */
1445df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1446df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
1447df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1448df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1449df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
1450df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
1451df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 12x12 output block.
1452df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
1453df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 15 multiplications in the 1-D kernel.
1454df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/24).
1455df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
1456df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1457df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
1458df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1459df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JCOEFPTR coef_block,
1460df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JSAMPARRAY output_buf, JDIMENSION output_col)
1461df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
1462df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1463df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1464df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3, z4;
1465df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
1466df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
1467df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
1468df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
1469df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1470df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
1471df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[8*12];	/* buffers data between passes */
1472df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
1473df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1474df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
1475df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1476df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
1477df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1478df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1479df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1480df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1481df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1482df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1483df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 <<= CONST_BITS;
1484df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1485df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1486df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1487df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1488df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1489df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1490df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z3 + z4;
1491df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 - z4;
1492df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1493df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1494df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1495df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 <<= CONST_BITS;
1496df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1497df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 <<= CONST_BITS;
1498df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1499df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z1 - z2;
1500df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1501df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = z3 + tmp12;
1502df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = z3 - tmp12;
1503df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1504df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z4 + z2;
1505df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1506df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp10 + tmp12;
1507df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp10 - tmp12;
1508df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1509df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z4 - z1 - z2;
1510df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1511df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = tmp11 + tmp12;
1512df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = tmp11 - tmp12;
1513df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1514df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1515df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1516df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1517df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1518df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1519df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1520df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1521df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
1522df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
1523df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1524df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z1 + z3;
1525df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
1526df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
1527df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
1528df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
1529df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1530df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1531df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
1532df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
1533df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1534df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 -= z4;
1535df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 -= z3;
1536df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
1537df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
1538df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
1539df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1540df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1541df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1542df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1543df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1544df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1545df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1546df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1547df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1548df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1549df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1550df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1551df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1552df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1553df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1554df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1555df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1556df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 12 rows from work array, store into output array. */
1557df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1558df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1559df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 12; ctr++) {
1560df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
1561df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1562df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1563df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1564df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1565df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1566df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 <<= CONST_BITS;
1567df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1568df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[4];
1569df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1570df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1571df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z3 + z4;
1572df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 - z4;
1573df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1574df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[2];
1575df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1576df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 <<= CONST_BITS;
1577df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[6];
1578df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 <<= CONST_BITS;
1579df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1580df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z1 - z2;
1581df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1582df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = z3 + tmp12;
1583df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = z3 - tmp12;
1584df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1585df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z4 + z2;
1586df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1587df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp10 + tmp12;
1588df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp10 - tmp12;
1589df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1590df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z4 - z1 - z2;
1591df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1592df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = tmp11 + tmp12;
1593df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = tmp11 - tmp12;
1594df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1595df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1596df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1597df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
1598df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
1599df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
1600df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[7];
1601df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1602df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
1603df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
1604df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1605df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z1 + z3;
1606df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
1607df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
1608df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
1609df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
1610df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1611df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1612df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
1613df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
1614df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1615df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 -= z4;
1616df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 -= z3;
1617df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
1618df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
1619df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
1620df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1621df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1622df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1623df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1624df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1625df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1626df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1627df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1628df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1629df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1630df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1631df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1632df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1633df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1634df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1635df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1636df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1637df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1638df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1639df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1640df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1641df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1642df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1643df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1644df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1645df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1646df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1647df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1648df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1649df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1650df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1651df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1652df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1653df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1654df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1655df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1656df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1657df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1658df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1659df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1660df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 8;		/* advance pointer to next row */
1661df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1662df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
1663df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1664df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1665df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
1666df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
1667df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 13x13 output block.
1668df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
1669df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 29 multiplications in the 1-D kernel.
1670df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/26).
1671df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
1672df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1673df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
1674df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1675df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JCOEFPTR coef_block,
1676df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JSAMPARRAY output_buf, JDIMENSION output_col)
1677df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
1678df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1679df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1680df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3, z4;
1681df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
1682df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
1683df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
1684df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
1685df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1686df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
1687df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[8*13];	/* buffers data between passes */
1688df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
1689df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1690df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
1691df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1692df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
1693df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1694df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1695df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1696df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1697df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1698df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1699df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 <<= CONST_BITS;
1700df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1701df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1702df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1703df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1704df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1705df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1706df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1707df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z3 + z4;
1708df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 - z4;
1709df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1710df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
1711df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
1712df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1713df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
1714df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
1715df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1716df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
1717df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
1718df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1719df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
1720df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1721df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1722df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
1723df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
1724df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1725df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1726df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1727df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1728df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
1729df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1730df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1731df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1732df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1733df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1734df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1735df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1736df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1737df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
1738df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
1739df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = z1 + z4;
1740df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
1741df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp11 + tmp12 + tmp13 -
1742df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
1743df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
1744df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1745df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1746df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
1747df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += tmp14;
1748df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1749df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
1750df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp14;
1751df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += tmp14;
1752df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
1753df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1754df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
1755df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
1756df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 += z1;
1757df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
1758df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
1759df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1760df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1761df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1762df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1763df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1764df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1765df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1766df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1767df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1768df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1769df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1770df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1771df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1772df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1773df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1774df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1775df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1776df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1777df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 13 rows from work array, store into output array. */
1778df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1779df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1780df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 13; ctr++) {
1781df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
1782df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1783df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1784df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1785df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1786df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1787df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 <<= CONST_BITS;
1788df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1789df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[2];
1790df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[4];
1791df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[6];
1792df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1793df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z3 + z4;
1794df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z3 - z4;
1795df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1796df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
1797df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
1798df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1799df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
1800df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
1801df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1802df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
1803df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
1804df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1805df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
1806df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1807df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1808df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
1809df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
1810df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1811df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1812df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1813df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1814df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
1815df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1816df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1817df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1818df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
1819df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
1820df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
1821df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[7];
1822df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1823df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
1824df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
1825df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = z1 + z4;
1826df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
1827df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp11 + tmp12 + tmp13 -
1828df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
1829df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
1830df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1831df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1832df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
1833df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += tmp14;
1834df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1835df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
1836df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp14;
1837df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += tmp14;
1838df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
1839df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1840df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
1841df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
1842df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 += z1;
1843df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
1844df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
1845df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1846df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1847df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1848df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1849df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1850df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1851df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1852df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1853df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1854df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1855df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1856df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1857df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1858df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1859df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1860df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1861df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1862df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1863df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1864df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1865df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1866df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1867df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1868df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1869df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1870df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1871df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1872df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1873df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1874df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1875df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1876df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1877df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1878df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1879df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1880df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1881df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1882df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1883df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1884df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
1885df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
1886df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
1887df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1888df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 8;		/* advance pointer to next row */
1889df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
1890df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
1891df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1892df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1893df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
1894df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
1895df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 14x14 output block.
1896df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
1897df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 20 multiplications in the 1-D kernel.
1898df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/28).
1899df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
1900df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1901df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
1902df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1903df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JCOEFPTR coef_block,
1904df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JSAMPARRAY output_buf, JDIMENSION output_col)
1905df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
1906df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1907df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1908df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3, z4;
1909df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
1910df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
1911df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
1912df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
1913df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1914df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
1915df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[8*14];	/* buffers data between passes */
1916df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
1917df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1918df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
1919df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1920df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
1921df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1922df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
1923df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1924df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
1925df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1926df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1927df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 <<= CONST_BITS;
1928df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
1929df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1930df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1931df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
1932df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
1933df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
1934df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1935df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z1 + z2;
1936df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z1 + z3;
1937df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z1 - z4;
1938df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1939df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
1940df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			CONST_BITS-PASS1_BITS);
1941df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1942df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1943df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1944df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1945df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
1946df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1947df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
1948df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
1949df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
1950df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
1951df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1952df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp10 + tmp13;
1953df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp26 = tmp10 - tmp13;
1954df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp11 + tmp14;
1955df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp11 - tmp14;
1956df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = tmp12 + tmp15;
1957df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = tmp12 - tmp15;
1958df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1959df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
1960df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1961df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1962df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1963df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1964df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1965df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z4 << CONST_BITS;
1966df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1967df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = z1 + z3;
1968df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
1969df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
1970df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
1971df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
1972df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
1973df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    -= z2;
1974df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
1975df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp16 += tmp15;
1976df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    += z4;
1977df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
1978df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
1979df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
1980df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
1981df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
1982df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
1983df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1984df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = (z1 - z3) << PASS1_BITS;
1985df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1986df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
1987df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
1988df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1989df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1990df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1991df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1992df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1993df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1994df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*3]  = (int) (tmp23 + tmp13);
1995df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*10] = (int) (tmp23 - tmp13);
1996df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1997df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1998df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1999df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2000df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2001df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2002df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
2003df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2004df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 14 rows from work array, store into output array. */
2005df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2006df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
2007df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 14; ctr++) {
2008df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
2009df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2010df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
2011df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2012df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
2013df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2014df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 <<= CONST_BITS;
2015df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[4];
2016df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
2017df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
2018df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
2019df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2020df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = z1 + z2;
2021df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z1 + z3;
2022df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z1 - z4;
2023df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2024df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
2025df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2026df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[2];
2027df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[6];
2028df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2029df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
2030df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2031df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2032df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2033df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
2034df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
2035df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2036df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp10 + tmp13;
2037df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp26 = tmp10 - tmp13;
2038df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp11 + tmp14;
2039df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp11 - tmp14;
2040df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = tmp12 + tmp15;
2041df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = tmp12 - tmp15;
2042df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2043df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
2044df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2045df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
2046df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
2047df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
2048df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[7];
2049df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 <<= CONST_BITS;
2050df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2051df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = z1 + z3;
2052df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
2053df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
2054df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2055df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
2056df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
2057df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    -= z2;
2058df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
2059df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp16 += tmp15;
2060df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
2061df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
2062df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
2063df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
2064df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2065df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
2066df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2067df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2068df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2069df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
2070df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2071df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2072df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2073df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2074df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2075df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2076df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2077df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2078df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2079df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2080df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2081df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2082df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2083df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2084df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2085df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2086df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2087df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2088df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2089df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2090df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2091df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2092df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2093df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2094df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2095df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2096df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2097df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2098df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2099df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2100df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2101df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2102df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2103df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2104df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2105df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2106df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2107df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2108df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2109df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2110df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2111df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2112df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2113df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2114df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 8;		/* advance pointer to next row */
2115df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
2116df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
2117df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2118df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2119df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
2120df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
2121df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 15x15 output block.
2122df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
2123df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 22 multiplications in the 1-D kernel.
2124df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/30).
2125df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
2126df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2127df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
2128df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2129df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JCOEFPTR coef_block,
2130df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JSAMPARRAY output_buf, JDIMENSION output_col)
2131df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
2132df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2133df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2134df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3, z4;
2135df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
2136df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
2137df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
2138df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
2139df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2140df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
2141df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[8*15];	/* buffers data between passes */
2142df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
2143df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2144df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
2145df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2146df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
2147df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2148df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
2149df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2150df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
2151df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2152df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2153df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 <<= CONST_BITS;
2154df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
2155df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2156df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2157df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2158df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2159df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2160df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2161df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2162df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2163df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2164df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z1 - tmp10;
2165df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z1 + tmp11;
2166df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
2167df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2168df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = z2 - z3;
2169df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 += z2;
2170df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2171df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2172df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
2173df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2174df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp13 + tmp10 + tmp11;
2175df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = tmp12 - tmp10 + tmp11 + z2;
2176df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2177df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2178df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2179df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2180df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp13 - tmp10 - tmp11;
2181df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp26 = tmp12 + tmp10 - tmp11 - z2;
2182df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2183df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2184df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2185df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2186df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp12 + tmp10 + tmp11;
2187df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = tmp13 - tmp10 + tmp11;
2188df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += tmp11;
2189df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
2190df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
2191df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2192df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
2193df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2194df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2195df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2196df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2197df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
2198df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2199df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2200df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z2 - z4;
2201df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
2202df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
2203df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
2204df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2205df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
2206df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
2207df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = z1 - z4;
2208df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
2209df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2210df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2211df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2212df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
2213df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
2214df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
2215df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
2216df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2217df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
2218df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2219df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2220df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2221df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2222df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2223df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2224df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2225df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2226df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2227df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2228df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2229df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2230df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2231df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2232df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2233df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2234df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
2235df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2236df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 15 rows from work array, store into output array. */
2237df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2238df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
2239df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 15; ctr++) {
2240df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
2241df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2242df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
2243df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2244df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
2245df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2246df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 <<= CONST_BITS;
2247df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2248df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[2];
2249df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[4];
2250df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[6];
2251df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2252df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2253df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2254df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2255df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z1 - tmp10;
2256df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z1 + tmp11;
2257df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
2258df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2259df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = z2 - z3;
2260df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 += z2;
2261df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2262df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2263df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
2264df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2265df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp13 + tmp10 + tmp11;
2266df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = tmp12 - tmp10 + tmp11 + z2;
2267df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2268df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2269df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2270df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2271df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp13 - tmp10 - tmp11;
2272df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp26 = tmp12 + tmp10 - tmp11 - z2;
2273df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2274df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2275df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2276df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2277df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp12 + tmp10 + tmp11;
2278df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = tmp13 - tmp10 + tmp11;
2279df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += tmp11;
2280df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
2281df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
2282df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2283df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
2284df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2285df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
2286df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
2287df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[5];
2288df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
2289df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[7];
2290df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2291df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = z2 - z4;
2292df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
2293df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
2294df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
2295df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2296df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
2297df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
2298df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = z1 - z4;
2299df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
2300df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2301df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2302df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2303df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
2304df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
2305df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
2306df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
2307df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2308df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
2309df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2310df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2311df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2312df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2313df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2314df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2315df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2316df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2317df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2318df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2319df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2320df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2321df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2322df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2323df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2324df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2325df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2326df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2327df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2328df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2329df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2330df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2331df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2332df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2333df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2334df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2335df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2336df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2337df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2338df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2339df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2340df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2341df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2342df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2343df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2344df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2345df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2346df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2347df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2348df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2349df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2350df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2351df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2352df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
2353df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2354df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2355df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2356df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 8;		/* advance pointer to next row */
2357df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
2358df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
2359df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2360df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2361df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org/*
2362df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Perform dequantization and inverse DCT on one block of coefficients,
2363df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * producing a 16x16 output block.
2364df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org *
2365df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * Optimized algorithm with 28 multiplications in the 1-D kernel.
2366df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org * cK represents sqrt(2) * cos(K*pi/32).
2367df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org */
2368df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2369df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgGLOBAL(void)
2370df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.orgjpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2371df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JCOEFPTR coef_block,
2372df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org		 JSAMPARRAY output_buf, JDIMENSION output_col)
2373df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org{
2374df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2375df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2376df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  INT32 z1, z2, z3, z4;
2377df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JCOEFPTR inptr;
2378df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  ISLOW_MULT_TYPE * quantptr;
2379df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int * wsptr;
2380df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPROW outptr;
2381df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2382df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int ctr;
2383df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  int workspace[8*16];	/* buffers data between passes */
2384df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  SHIFT_TEMPS
2385df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2386df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 1: process columns from input, store into work array. */
2387df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2388df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  inptr = coef_block;
2389df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2390df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
2391df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2392df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
2393df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2394df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2395df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 <<= CONST_BITS;
2396df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
2397df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
2398df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2399df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2400df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
2401df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
2402df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2403df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp0 + tmp1;
2404df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp0 - tmp1;
2405df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = tmp0 + tmp2;
2406df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = tmp0 - tmp2;
2407df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2408df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2409df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2410df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = z1 - z2;
2411df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
2412df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
2413df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2414df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
2415df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
2416df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2417df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2418df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2419df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp10 + tmp0;
2420df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp27 = tmp10 - tmp0;
2421df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp12 + tmp1;
2422df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp26 = tmp12 - tmp1;
2423df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = tmp13 + tmp2;
2424df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp13 - tmp2;
2425df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = tmp11 + tmp3;
2426df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = tmp11 - tmp3;
2427df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2428df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
2429df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2430df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2431df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2432df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2433df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2434df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2435df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z1 + z3;
2436df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2437df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
2438df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
2439df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
2440df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
2441df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
2442df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
2443df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0  = tmp1 + tmp2 + tmp3 -
2444df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
2445df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = tmp10 + tmp11 + tmp12 -
2446df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
2447df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
2448df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
2449df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
2450df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
2451df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
2452df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
2453df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2    += z4;
2454df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
2455df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1  += z1;
2456df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
2457df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
2458df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
2459df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += z2;
2460df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2461df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2  += z2;
2462df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3  += z2;
2463df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
2464df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 += z2;
2465df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z2;
2466df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2467df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
2468df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2469df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
2470df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
2471df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
2472df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
2473df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
2474df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
2475df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
2476df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
2477df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2478df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2479df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2480df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2481df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2482df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2483df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2484df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2485df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
2486df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2487df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  /* Pass 2: process 16 rows from work array, store into output array. */
2488df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2489df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  wsptr = workspace;
2490df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  for (ctr = 0; ctr < 16; ctr++) {
2491df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr = output_buf[ctr] + output_col;
2492df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2493df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Even part */
2494df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2495df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Add fudge factor here for final descale. */
2496df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2497df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 <<= CONST_BITS;
2498df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2499df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[4];
2500df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
2501df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
2502df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2503df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = tmp0 + tmp1;
2504df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = tmp0 - tmp1;
2505df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = tmp0 + tmp2;
2506df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = tmp0 - tmp2;
2507df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2508df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[2];
2509df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[6];
2510df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = z1 - z2;
2511df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
2512df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
2513df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2514df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
2515df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
2516df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2517df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2518df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2519df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp20 = tmp10 + tmp0;
2520df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp27 = tmp10 - tmp0;
2521df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp21 = tmp12 + tmp1;
2522df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp26 = tmp12 - tmp1;
2523df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp22 = tmp13 + tmp2;
2524df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp25 = tmp13 - tmp2;
2525df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp23 = tmp11 + tmp3;
2526df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp24 = tmp11 - tmp3;
2527df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2528df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Odd part */
2529df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2530df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1 = (INT32) wsptr[1];
2531df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2 = (INT32) wsptr[3];
2532df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z3 = (INT32) wsptr[5];
2533df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z4 = (INT32) wsptr[7];
2534df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2535df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = z1 + z3;
2536df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2537df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
2538df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
2539df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
2540df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
2541df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
2542df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
2543df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp0  = tmp1 + tmp2 + tmp3 -
2544df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
2545df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp13 = tmp10 + tmp11 + tmp12 -
2546df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
2547df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
2548df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
2549df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
2550df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
2551df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
2552df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
2553df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2    += z4;
2554df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
2555df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp1  += z1;
2556df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
2557df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
2558df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
2559df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp12 += z2;
2560df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2561df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp2  += z2;
2562df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp3  += z2;
2563df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
2564df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp10 += z2;
2565df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    tmp11 += z2;
2566df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2567df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    /* Final output stage */
2568df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2569df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2570df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2571df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2572df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2573df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2574df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2575df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2576df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2577df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2578df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2579df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2580df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2581df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2582df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2583df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2584df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2585df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2586df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2587df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2588df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2589df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2590df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2591df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2592df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2593df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2594df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2595df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2596df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2597df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2598df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2599df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2600df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2601df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2602df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2603df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2604df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2605df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2606df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2607df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2608df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2609df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2610df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2611df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2612df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2613df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2614df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2615df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org					       CONST_BITS+PASS1_BITS+3)
2616df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org			     & RANGE_MASK];
2617df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2618df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org    wsptr += 8;		/* advance pointer to next row */
2619df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org  }
2620df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org}
2621df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org
2622df5ffdd8b73415b78055177148995bdbdf38b12ehbono@chromium.org#endif /* IDCT_SCALING_SUPPORTED */
2623f0c4f33a4aa0760ba0e12a254b69d996442c9c5hbono@chromium.org#endif /* DCT_ISLOW_SUPPORTED */
2624