1/*
2 * jccoefct.c
3 *
4 * This file was part of the Independent JPEG Group's software:
5 * Copyright (C) 1994-1997, Thomas G. Lane.
6 * It was modified by The libjpeg-turbo Project to include only code and
7 * information relevant to libjpeg-turbo.
8 * For conditions of distribution and use, see the accompanying README file.
9 *
10 * This file contains the coefficient buffer controller for compression.
11 * This controller is the top level of the JPEG compressor proper.
12 * The coefficient buffer lies between forward-DCT and entropy encoding steps.
13 */
14
15#define JPEG_INTERNALS
16#include "jinclude.h"
17#include "jpeglib.h"
18
19
20/* We use a full-image coefficient buffer when doing Huffman optimization,
21 * and also for writing multiple-scan JPEG files.  In all cases, the DCT
22 * step is run during the first pass, and subsequent passes need only read
23 * the buffered coefficients.
24 */
25#ifdef ENTROPY_OPT_SUPPORTED
26#define FULL_COEF_BUFFER_SUPPORTED
27#else
28#ifdef C_MULTISCAN_FILES_SUPPORTED
29#define FULL_COEF_BUFFER_SUPPORTED
30#endif
31#endif
32
33
34/* Private buffer controller object */
35
36typedef struct {
37  struct jpeg_c_coef_controller pub; /* public fields */
38
39  JDIMENSION iMCU_row_num;      /* iMCU row # within image */
40  JDIMENSION mcu_ctr;           /* counts MCUs processed in current row */
41  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
42  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
43
44  /* For single-pass compression, it's sufficient to buffer just one MCU
45   * (although this may prove a bit slow in practice).  We allocate a
46   * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each
47   * MCU constructed and sent.  In multi-pass modes, this array points to the
48   * current MCU's blocks within the virtual arrays.
49   */
50  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
51
52  /* In multi-pass modes, we need a virtual block array for each component. */
53  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
54} my_coef_controller;
55
56typedef my_coef_controller * my_coef_ptr;
57
58
59/* Forward declarations */
60METHODDEF(boolean) compress_data
61        (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
62#ifdef FULL_COEF_BUFFER_SUPPORTED
63METHODDEF(boolean) compress_first_pass
64        (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
65METHODDEF(boolean) compress_output
66        (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
67#endif
68
69
70LOCAL(void)
71start_iMCU_row (j_compress_ptr cinfo)
72/* Reset within-iMCU-row counters for a new row */
73{
74  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
75
76  /* In an interleaved scan, an MCU row is the same as an iMCU row.
77   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
78   * But at the bottom of the image, process only what's left.
79   */
80  if (cinfo->comps_in_scan > 1) {
81    coef->MCU_rows_per_iMCU_row = 1;
82  } else {
83    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
84      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
85    else
86      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
87  }
88
89  coef->mcu_ctr = 0;
90  coef->MCU_vert_offset = 0;
91}
92
93
94/*
95 * Initialize for a processing pass.
96 */
97
98METHODDEF(void)
99start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
100{
101  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
102
103  coef->iMCU_row_num = 0;
104  start_iMCU_row(cinfo);
105
106  switch (pass_mode) {
107  case JBUF_PASS_THRU:
108    if (coef->whole_image[0] != NULL)
109      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
110    coef->pub.compress_data = compress_data;
111    break;
112#ifdef FULL_COEF_BUFFER_SUPPORTED
113  case JBUF_SAVE_AND_PASS:
114    if (coef->whole_image[0] == NULL)
115      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
116    coef->pub.compress_data = compress_first_pass;
117    break;
118  case JBUF_CRANK_DEST:
119    if (coef->whole_image[0] == NULL)
120      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
121    coef->pub.compress_data = compress_output;
122    break;
123#endif
124  default:
125    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
126    break;
127  }
128}
129
130
131/*
132 * Process some data in the single-pass case.
133 * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
134 * per call, ie, v_samp_factor block rows for each component in the image.
135 * Returns TRUE if the iMCU row is completed, FALSE if suspended.
136 *
137 * NB: input_buf contains a plane for each component in image,
138 * which we index according to the component's SOF position.
139 */
140
141METHODDEF(boolean)
142compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
143{
144  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
145  JDIMENSION MCU_col_num;       /* index of current MCU within row */
146  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
147  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
148  int blkn, bi, ci, yindex, yoffset, blockcnt;
149  JDIMENSION ypos, xpos;
150  jpeg_component_info *compptr;
151
152  /* Loop to write as much as one whole iMCU row */
153  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
154       yoffset++) {
155    for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
156         MCU_col_num++) {
157      /* Determine where data comes from in input_buf and do the DCT thing.
158       * Each call on forward_DCT processes a horizontal row of DCT blocks
159       * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
160       * sequentially.  Dummy blocks at the right or bottom edge are filled in
161       * specially.  The data in them does not matter for image reconstruction,
162       * so we fill them with values that will encode to the smallest amount of
163       * data, viz: all zeroes in the AC entries, DC entries equal to previous
164       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
165       */
166      blkn = 0;
167      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
168        compptr = cinfo->cur_comp_info[ci];
169        blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
170                                                : compptr->last_col_width;
171        xpos = MCU_col_num * compptr->MCU_sample_width;
172        ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
173        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
174          if (coef->iMCU_row_num < last_iMCU_row ||
175              yoffset+yindex < compptr->last_row_height) {
176            (*cinfo->fdct->forward_DCT) (cinfo, compptr,
177                                         input_buf[compptr->component_index],
178                                         coef->MCU_buffer[blkn],
179                                         ypos, xpos, (JDIMENSION) blockcnt);
180            if (blockcnt < compptr->MCU_width) {
181              /* Create some dummy blocks at the right edge of the image. */
182              jzero_far((void *) coef->MCU_buffer[blkn + blockcnt],
183                        (compptr->MCU_width - blockcnt) * sizeof(JBLOCK));
184              for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
185                coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
186              }
187            }
188          } else {
189            /* Create a row of dummy blocks at the bottom of the image. */
190            jzero_far((void *) coef->MCU_buffer[blkn],
191                      compptr->MCU_width * sizeof(JBLOCK));
192            for (bi = 0; bi < compptr->MCU_width; bi++) {
193              coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
194            }
195          }
196          blkn += compptr->MCU_width;
197          ypos += DCTSIZE;
198        }
199      }
200      /* Try to write the MCU.  In event of a suspension failure, we will
201       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
202       */
203      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
204        /* Suspension forced; update state counters and exit */
205        coef->MCU_vert_offset = yoffset;
206        coef->mcu_ctr = MCU_col_num;
207        return FALSE;
208      }
209    }
210    /* Completed an MCU row, but perhaps not an iMCU row */
211    coef->mcu_ctr = 0;
212  }
213  /* Completed the iMCU row, advance counters for next one */
214  coef->iMCU_row_num++;
215  start_iMCU_row(cinfo);
216  return TRUE;
217}
218
219
220#ifdef FULL_COEF_BUFFER_SUPPORTED
221
222/*
223 * Process some data in the first pass of a multi-pass case.
224 * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
225 * per call, ie, v_samp_factor block rows for each component in the image.
226 * This amount of data is read from the source buffer, DCT'd and quantized,
227 * and saved into the virtual arrays.  We also generate suitable dummy blocks
228 * as needed at the right and lower edges.  (The dummy blocks are constructed
229 * in the virtual arrays, which have been padded appropriately.)  This makes
230 * it possible for subsequent passes not to worry about real vs. dummy blocks.
231 *
232 * We must also emit the data to the entropy encoder.  This is conveniently
233 * done by calling compress_output() after we've loaded the current strip
234 * of the virtual arrays.
235 *
236 * NB: input_buf contains a plane for each component in image.  All
237 * components are DCT'd and loaded into the virtual arrays in this pass.
238 * However, it may be that only a subset of the components are emitted to
239 * the entropy encoder during this first pass; be careful about looking
240 * at the scan-dependent variables (MCU dimensions, etc).
241 */
242
243METHODDEF(boolean)
244compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
245{
246  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
247  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
248  JDIMENSION blocks_across, MCUs_across, MCUindex;
249  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
250  JCOEF lastDC;
251  jpeg_component_info *compptr;
252  JBLOCKARRAY buffer;
253  JBLOCKROW thisblockrow, lastblockrow;
254
255  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
256       ci++, compptr++) {
257    /* Align the virtual buffer for this component. */
258    buffer = (*cinfo->mem->access_virt_barray)
259      ((j_common_ptr) cinfo, coef->whole_image[ci],
260       coef->iMCU_row_num * compptr->v_samp_factor,
261       (JDIMENSION) compptr->v_samp_factor, TRUE);
262    /* Count non-dummy DCT block rows in this iMCU row. */
263    if (coef->iMCU_row_num < last_iMCU_row)
264      block_rows = compptr->v_samp_factor;
265    else {
266      /* NB: can't use last_row_height here, since may not be set! */
267      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
268      if (block_rows == 0) block_rows = compptr->v_samp_factor;
269    }
270    blocks_across = compptr->width_in_blocks;
271    h_samp_factor = compptr->h_samp_factor;
272    /* Count number of dummy blocks to be added at the right margin. */
273    ndummy = (int) (blocks_across % h_samp_factor);
274    if (ndummy > 0)
275      ndummy = h_samp_factor - ndummy;
276    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
277     * on forward_DCT processes a complete horizontal row of DCT blocks.
278     */
279    for (block_row = 0; block_row < block_rows; block_row++) {
280      thisblockrow = buffer[block_row];
281      (*cinfo->fdct->forward_DCT) (cinfo, compptr,
282                                   input_buf[ci], thisblockrow,
283                                   (JDIMENSION) (block_row * DCTSIZE),
284                                   (JDIMENSION) 0, blocks_across);
285      if (ndummy > 0) {
286        /* Create dummy blocks at the right edge of the image. */
287        thisblockrow += blocks_across; /* => first dummy block */
288        jzero_far((void *) thisblockrow, ndummy * sizeof(JBLOCK));
289        lastDC = thisblockrow[-1][0];
290        for (bi = 0; bi < ndummy; bi++) {
291          thisblockrow[bi][0] = lastDC;
292        }
293      }
294    }
295    /* If at end of image, create dummy block rows as needed.
296     * The tricky part here is that within each MCU, we want the DC values
297     * of the dummy blocks to match the last real block's DC value.
298     * This squeezes a few more bytes out of the resulting file...
299     */
300    if (coef->iMCU_row_num == last_iMCU_row) {
301      blocks_across += ndummy;  /* include lower right corner */
302      MCUs_across = blocks_across / h_samp_factor;
303      for (block_row = block_rows; block_row < compptr->v_samp_factor;
304           block_row++) {
305        thisblockrow = buffer[block_row];
306        lastblockrow = buffer[block_row-1];
307        jzero_far((void *) thisblockrow,
308                  (size_t) (blocks_across * sizeof(JBLOCK)));
309        for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
310          lastDC = lastblockrow[h_samp_factor-1][0];
311          for (bi = 0; bi < h_samp_factor; bi++) {
312            thisblockrow[bi][0] = lastDC;
313          }
314          thisblockrow += h_samp_factor; /* advance to next MCU in row */
315          lastblockrow += h_samp_factor;
316        }
317      }
318    }
319  }
320  /* NB: compress_output will increment iMCU_row_num if successful.
321   * A suspension return will result in redoing all the work above next time.
322   */
323
324  /* Emit data to the entropy encoder, sharing code with subsequent passes */
325  return compress_output(cinfo, input_buf);
326}
327
328
329/*
330 * Process some data in subsequent passes of a multi-pass case.
331 * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
332 * per call, ie, v_samp_factor block rows for each component in the scan.
333 * The data is obtained from the virtual arrays and fed to the entropy coder.
334 * Returns TRUE if the iMCU row is completed, FALSE if suspended.
335 *
336 * NB: input_buf is ignored; it is likely to be a NULL pointer.
337 */
338
339METHODDEF(boolean)
340compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
341{
342  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
343  JDIMENSION MCU_col_num;       /* index of current MCU within row */
344  int blkn, ci, xindex, yindex, yoffset;
345  JDIMENSION start_col;
346  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
347  JBLOCKROW buffer_ptr;
348  jpeg_component_info *compptr;
349
350  /* Align the virtual buffers for the components used in this scan.
351   * NB: during first pass, this is safe only because the buffers will
352   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
353   */
354  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
355    compptr = cinfo->cur_comp_info[ci];
356    buffer[ci] = (*cinfo->mem->access_virt_barray)
357      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
358       coef->iMCU_row_num * compptr->v_samp_factor,
359       (JDIMENSION) compptr->v_samp_factor, FALSE);
360  }
361
362  /* Loop to process one whole iMCU row */
363  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
364       yoffset++) {
365    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
366         MCU_col_num++) {
367      /* Construct list of pointers to DCT blocks belonging to this MCU */
368      blkn = 0;                 /* index of current DCT block within MCU */
369      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
370        compptr = cinfo->cur_comp_info[ci];
371        start_col = MCU_col_num * compptr->MCU_width;
372        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
373          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
374          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
375            coef->MCU_buffer[blkn++] = buffer_ptr++;
376          }
377        }
378      }
379      /* Try to write the MCU. */
380      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
381        /* Suspension forced; update state counters and exit */
382        coef->MCU_vert_offset = yoffset;
383        coef->mcu_ctr = MCU_col_num;
384        return FALSE;
385      }
386    }
387    /* Completed an MCU row, but perhaps not an iMCU row */
388    coef->mcu_ctr = 0;
389  }
390  /* Completed the iMCU row, advance counters for next one */
391  coef->iMCU_row_num++;
392  start_iMCU_row(cinfo);
393  return TRUE;
394}
395
396#endif /* FULL_COEF_BUFFER_SUPPORTED */
397
398
399/*
400 * Initialize coefficient buffer controller.
401 */
402
403GLOBAL(void)
404jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
405{
406  my_coef_ptr coef;
407
408  coef = (my_coef_ptr)
409    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
410                                sizeof(my_coef_controller));
411  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
412  coef->pub.start_pass = start_pass_coef;
413
414  /* Create the coefficient buffer. */
415  if (need_full_buffer) {
416#ifdef FULL_COEF_BUFFER_SUPPORTED
417    /* Allocate a full-image virtual array for each component, */
418    /* padded to a multiple of samp_factor DCT blocks in each direction. */
419    int ci;
420    jpeg_component_info *compptr;
421
422    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
423         ci++, compptr++) {
424      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
425        ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
426         (JDIMENSION) jround_up((long) compptr->width_in_blocks,
427                                (long) compptr->h_samp_factor),
428         (JDIMENSION) jround_up((long) compptr->height_in_blocks,
429                                (long) compptr->v_samp_factor),
430         (JDIMENSION) compptr->v_samp_factor);
431    }
432#else
433    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
434#endif
435  } else {
436    /* We only need a single-MCU buffer. */
437    JBLOCKROW buffer;
438    int i;
439
440    buffer = (JBLOCKROW)
441      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
442                                  C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
443    for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
444      coef->MCU_buffer[i] = buffer + i;
445    }
446    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
447  }
448}
449