1/*
2 * jcdctmgr.c
3 *
4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains the forward-DCT management logic.
9 * This code selects a particular DCT implementation to be used,
10 * and it performs related housekeeping chores including coefficient
11 * quantization.
12 */
13
14#define JPEG_INTERNALS
15#include "jinclude.h"
16#include "jpeglib.h"
17#include "jdct.h"		/* Private declarations for DCT subsystem */
18
19
20/* Private subobject for this module */
21
22typedef struct {
23  struct jpeg_forward_dct pub;	/* public fields */
24
25  /* Pointer to the DCT routine actually in use */
26  forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
27
28  /* The actual post-DCT divisors --- not identical to the quant table
29   * entries, because of scaling (especially for an unnormalized DCT).
30   * Each table is given in normal array order.
31   */
32  DCTELEM * divisors[NUM_QUANT_TBLS];
33
34#ifdef DCT_FLOAT_SUPPORTED
35  /* Same as above for the floating-point case. */
36  float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
37  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
38#endif
39} my_fdct_controller;
40
41typedef my_fdct_controller * my_fdct_ptr;
42
43
44/* The current scaled-DCT routines require ISLOW-style divisor tables,
45 * so be sure to compile that code if either ISLOW or SCALING is requested.
46 */
47#ifdef DCT_ISLOW_SUPPORTED
48#define PROVIDE_ISLOW_TABLES
49#else
50#ifdef DCT_SCALING_SUPPORTED
51#define PROVIDE_ISLOW_TABLES
52#endif
53#endif
54
55
56/*
57 * Perform forward DCT on one or more blocks of a component.
58 *
59 * The input samples are taken from the sample_data[] array starting at
60 * position start_row/start_col, and moving to the right for any additional
61 * blocks. The quantized coefficients are returned in coef_blocks[].
62 */
63
64METHODDEF(void)
65forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
66             JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
67             JDIMENSION start_row, JDIMENSION start_col,
68             JDIMENSION num_blocks)
69/* This version is used for integer DCT implementations. */
70{
71  /* This routine is heavily used, so it's worth coding it tightly. */
72  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
73  forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
74  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
75  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
76  JDIMENSION bi;
77
78  sample_data += start_row;	/* fold in the vertical offset once */
79
80  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
81    /* Perform the DCT */
82    (*do_dct) (workspace, sample_data, start_col);
83
84    /* Quantize/descale the coefficients, and store into coef_blocks[] */
85    { register DCTELEM temp, qval;
86      register int i;
87      register JCOEFPTR output_ptr = coef_blocks[bi];
88
89      for (i = 0; i < DCTSIZE2; i++) {
90        qval = divisors[i];
91        temp = workspace[i];
92        /* Divide the coefficient value by qval, ensuring proper rounding.
93         * Since C does not specify the direction of rounding for negative
94         * quotients, we have to force the dividend positive for portability.
95         *
96         * In most files, at least half of the output values will be zero
97         * (at default quantization settings, more like three-quarters...)
98         * so we should ensure that this case is fast.  On many machines,
99         * a comparison is enough cheaper than a divide to make a special test
100         * a win.  Since both inputs will be nonnegative, we need only test
101         * for a < b to discover whether a/b is 0.
102         * If your machine's division is fast enough, define FAST_DIVIDE.
103         */
104#ifdef FAST_DIVIDE
105#define DIVIDE_BY(a,b)	a /= b
106#else
107#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
108#endif
109        if (temp < 0) {
110          temp = -temp;
111          temp += qval>>1;	/* for rounding */
112          DIVIDE_BY(temp, qval);
113          temp = -temp;
114        } else {
115          temp += qval>>1;	/* for rounding */
116          DIVIDE_BY(temp, qval);
117        }
118        output_ptr[i] = (JCOEF) temp;
119      }
120    }
121  }
122}
123
124
125#ifdef DCT_FLOAT_SUPPORTED
126
127METHODDEF(void)
128forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
129                   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
130                   JDIMENSION start_row, JDIMENSION start_col,
131                   JDIMENSION num_blocks)
132/* This version is used for floating-point DCT implementations. */
133{
134  /* This routine is heavily used, so it's worth coding it tightly. */
135  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
136  float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
137  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
138  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
139  JDIMENSION bi;
140
141  sample_data += start_row;	/* fold in the vertical offset once */
142
143  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
144    /* Perform the DCT */
145    (*do_dct) (workspace, sample_data, start_col);
146
147    /* Quantize/descale the coefficients, and store into coef_blocks[] */
148    { register FAST_FLOAT temp;
149      register int i;
150      register JCOEFPTR output_ptr = coef_blocks[bi];
151
152      for (i = 0; i < DCTSIZE2; i++) {
153        /* Apply the quantization and scaling factor */
154        temp = workspace[i] * divisors[i];
155        /* Round to nearest integer.
156         * Since C does not specify the direction of rounding for negative
157         * quotients, we have to force the dividend positive for portability.
158         * The maximum coefficient size is +-16K (for 12-bit data), so this
159         * code should work for either 16-bit or 32-bit ints.
160         */
161        output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
162      }
163    }
164  }
165}
166
167#endif /* DCT_FLOAT_SUPPORTED */
168
169
170/*
171 * Initialize for a processing pass.
172 * Verify that all referenced Q-tables are present, and set up
173 * the divisor table for each one.
174 * In the current implementation, DCT of all components is done during
175 * the first pass, even if only some components will be output in the
176 * first scan.  Hence all components should be examined here.
177 */
178
179METHODDEF(void)
180start_pass_fdctmgr (j_compress_ptr cinfo)
181{
182  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
183  int ci, qtblno, i;
184  jpeg_component_info *compptr;
185  int method = 0;
186  JQUANT_TBL * qtbl;
187  DCTELEM * dtbl;
188
189  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
190       ci++, compptr++) {
191    /* Select the proper DCT routine for this component's scaling */
192    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
193#ifdef DCT_SCALING_SUPPORTED
194    case ((1 << 8) + 1):
195      fdct->do_dct[ci] = jpeg_fdct_1x1;
196      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
197      break;
198    case ((2 << 8) + 2):
199      fdct->do_dct[ci] = jpeg_fdct_2x2;
200      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
201      break;
202    case ((3 << 8) + 3):
203      fdct->do_dct[ci] = jpeg_fdct_3x3;
204      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
205      break;
206    case ((4 << 8) + 4):
207      fdct->do_dct[ci] = jpeg_fdct_4x4;
208      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
209      break;
210    case ((5 << 8) + 5):
211      fdct->do_dct[ci] = jpeg_fdct_5x5;
212      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
213      break;
214    case ((6 << 8) + 6):
215      fdct->do_dct[ci] = jpeg_fdct_6x6;
216      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
217      break;
218    case ((7 << 8) + 7):
219      fdct->do_dct[ci] = jpeg_fdct_7x7;
220      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
221      break;
222    case ((9 << 8) + 9):
223      fdct->do_dct[ci] = jpeg_fdct_9x9;
224      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
225      break;
226    case ((10 << 8) + 10):
227      fdct->do_dct[ci] = jpeg_fdct_10x10;
228      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
229      break;
230    case ((11 << 8) + 11):
231      fdct->do_dct[ci] = jpeg_fdct_11x11;
232      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
233      break;
234    case ((12 << 8) + 12):
235      fdct->do_dct[ci] = jpeg_fdct_12x12;
236      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
237      break;
238    case ((13 << 8) + 13):
239      fdct->do_dct[ci] = jpeg_fdct_13x13;
240      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
241      break;
242    case ((14 << 8) + 14):
243      fdct->do_dct[ci] = jpeg_fdct_14x14;
244      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
245      break;
246    case ((15 << 8) + 15):
247      fdct->do_dct[ci] = jpeg_fdct_15x15;
248      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
249      break;
250    case ((16 << 8) + 16):
251      fdct->do_dct[ci] = jpeg_fdct_16x16;
252      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
253      break;
254    case ((16 << 8) + 8):
255      fdct->do_dct[ci] = jpeg_fdct_16x8;
256      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
257      break;
258    case ((14 << 8) + 7):
259      fdct->do_dct[ci] = jpeg_fdct_14x7;
260      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
261      break;
262    case ((12 << 8) + 6):
263      fdct->do_dct[ci] = jpeg_fdct_12x6;
264      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
265      break;
266    case ((10 << 8) + 5):
267      fdct->do_dct[ci] = jpeg_fdct_10x5;
268      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
269      break;
270    case ((8 << 8) + 4):
271      fdct->do_dct[ci] = jpeg_fdct_8x4;
272      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
273      break;
274    case ((6 << 8) + 3):
275      fdct->do_dct[ci] = jpeg_fdct_6x3;
276      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
277      break;
278    case ((4 << 8) + 2):
279      fdct->do_dct[ci] = jpeg_fdct_4x2;
280      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
281      break;
282    case ((2 << 8) + 1):
283      fdct->do_dct[ci] = jpeg_fdct_2x1;
284      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
285      break;
286    case ((8 << 8) + 16):
287      fdct->do_dct[ci] = jpeg_fdct_8x16;
288      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
289      break;
290    case ((7 << 8) + 14):
291      fdct->do_dct[ci] = jpeg_fdct_7x14;
292      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
293      break;
294    case ((6 << 8) + 12):
295      fdct->do_dct[ci] = jpeg_fdct_6x12;
296      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
297      break;
298    case ((5 << 8) + 10):
299      fdct->do_dct[ci] = jpeg_fdct_5x10;
300      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
301      break;
302    case ((4 << 8) + 8):
303      fdct->do_dct[ci] = jpeg_fdct_4x8;
304      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
305      break;
306    case ((3 << 8) + 6):
307      fdct->do_dct[ci] = jpeg_fdct_3x6;
308      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
309      break;
310    case ((2 << 8) + 4):
311      fdct->do_dct[ci] = jpeg_fdct_2x4;
312      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
313      break;
314    case ((1 << 8) + 2):
315      fdct->do_dct[ci] = jpeg_fdct_1x2;
316      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
317      break;
318#endif
319    case ((DCTSIZE << 8) + DCTSIZE):
320      switch (cinfo->dct_method) {
321#ifdef DCT_ISLOW_SUPPORTED
322      case JDCT_ISLOW:
323        fdct->do_dct[ci] = jpeg_fdct_islow;
324        method = JDCT_ISLOW;
325        break;
326#endif
327#ifdef DCT_IFAST_SUPPORTED
328      case JDCT_IFAST:
329        fdct->do_dct[ci] = jpeg_fdct_ifast;
330        method = JDCT_IFAST;
331        break;
332#endif
333#ifdef DCT_FLOAT_SUPPORTED
334      case JDCT_FLOAT:
335        fdct->do_float_dct[ci] = jpeg_fdct_float;
336        method = JDCT_FLOAT;
337        break;
338#endif
339      default:
340        ERREXIT(cinfo, JERR_NOT_COMPILED);
341        break;
342      }
343      break;
344    default:
345      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
346               compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
347      break;
348    }
349    qtblno = compptr->quant_tbl_no;
350    /* Make sure specified quantization table is present */
351    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
352        cinfo->quant_tbl_ptrs[qtblno] == NULL)
353      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
354    qtbl = cinfo->quant_tbl_ptrs[qtblno];
355    /* Compute divisors for this quant table */
356    /* We may do this more than once for same table, but it's not a big deal */
357    switch (method) {
358#ifdef PROVIDE_ISLOW_TABLES
359    case JDCT_ISLOW:
360      /* For LL&M IDCT method, divisors are equal to raw quantization
361       * coefficients multiplied by 8 (to counteract scaling).
362       */
363      if (fdct->divisors[qtblno] == NULL) {
364        fdct->divisors[qtblno] = (DCTELEM *)
365          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
366                                      DCTSIZE2 * SIZEOF(DCTELEM));
367      }
368      dtbl = fdct->divisors[qtblno];
369      for (i = 0; i < DCTSIZE2; i++) {
370        dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
371      }
372      fdct->pub.forward_DCT[ci] = forward_DCT;
373      break;
374#endif
375#ifdef DCT_IFAST_SUPPORTED
376    case JDCT_IFAST:
377      {
378        /* For AA&N IDCT method, divisors are equal to quantization
379         * coefficients scaled by scalefactor[row]*scalefactor[col], where
380         *   scalefactor[0] = 1
381         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
382         * We apply a further scale factor of 8.
383         */
384#define CONST_BITS 14
385        static const INT16 aanscales[DCTSIZE2] = {
386          /* precomputed values scaled up by 14 bits */
387          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
388          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
389          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
390          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
391          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
392          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
393           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
394           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
395        };
396        SHIFT_TEMPS
397
398        if (fdct->divisors[qtblno] == NULL) {
399          fdct->divisors[qtblno] = (DCTELEM *)
400            (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
401                                        DCTSIZE2 * SIZEOF(DCTELEM));
402        }
403        dtbl = fdct->divisors[qtblno];
404        for (i = 0; i < DCTSIZE2; i++) {
405          dtbl[i] = (DCTELEM)
406            DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
407                                  (INT32) aanscales[i]),
408                    CONST_BITS-3);
409        }
410      }
411      fdct->pub.forward_DCT[ci] = forward_DCT;
412      break;
413#endif
414#ifdef DCT_FLOAT_SUPPORTED
415    case JDCT_FLOAT:
416      {
417        /* For float AA&N IDCT method, divisors are equal to quantization
418         * coefficients scaled by scalefactor[row]*scalefactor[col], where
419         *   scalefactor[0] = 1
420         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
421         * We apply a further scale factor of 8.
422         * What's actually stored is 1/divisor so that the inner loop can
423         * use a multiplication rather than a division.
424         */
425        FAST_FLOAT * fdtbl;
426        int row, col;
427        static const double aanscalefactor[DCTSIZE] = {
428          1.0, 1.387039845, 1.306562965, 1.175875602,
429          1.0, 0.785694958, 0.541196100, 0.275899379
430        };
431
432        if (fdct->float_divisors[qtblno] == NULL) {
433          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
434            (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
435                                        DCTSIZE2 * SIZEOF(FAST_FLOAT));
436        }
437        fdtbl = fdct->float_divisors[qtblno];
438        i = 0;
439        for (row = 0; row < DCTSIZE; row++) {
440          for (col = 0; col < DCTSIZE; col++) {
441            fdtbl[i] = (FAST_FLOAT)
442              (1.0 / (((double) qtbl->quantval[i] *
443                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
444            i++;
445          }
446        }
447      }
448      fdct->pub.forward_DCT[ci] = forward_DCT_float;
449      break;
450#endif
451    default:
452      ERREXIT(cinfo, JERR_NOT_COMPILED);
453      break;
454    }
455  }
456}
457
458
459/*
460 * Initialize FDCT manager.
461 */
462
463GLOBAL(void)
464jinit_forward_dct (j_compress_ptr cinfo)
465{
466  my_fdct_ptr fdct;
467  int i;
468
469  fdct = (my_fdct_ptr)
470    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
471                                SIZEOF(my_fdct_controller));
472  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
473  fdct->pub.start_pass = start_pass_fdctmgr;
474
475  /* Mark divisor tables unallocated */
476  for (i = 0; i < NUM_QUANT_TBLS; i++) {
477    fdct->divisors[i] = NULL;
478#ifdef DCT_FLOAT_SUPPORTED
479    fdct->float_divisors[i] = NULL;
480#endif
481  }
482}
483