1/*
2 * jcsample.c
3 *
4 * This file was part of the Independent JPEG Group's software:
5 * Copyright (C) 1991-1996, Thomas G. Lane.
6 * libjpeg-turbo Modifications:
7 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
8 * Copyright (C) 2014, MIPS Technologies, Inc., California
9 * For conditions of distribution and use, see the accompanying README file.
10 *
11 * This file contains downsampling routines.
12 *
13 * Downsampling input data is counted in "row groups".  A row group
14 * is defined to be max_v_samp_factor pixel rows of each component,
15 * from which the downsampler produces v_samp_factor sample rows.
16 * A single row group is processed in each call to the downsampler module.
17 *
18 * The downsampler is responsible for edge-expansion of its output data
19 * to fill an integral number of DCT blocks horizontally.  The source buffer
20 * may be modified if it is helpful for this purpose (the source buffer is
21 * allocated wide enough to correspond to the desired output width).
22 * The caller (the prep controller) is responsible for vertical padding.
23 *
24 * The downsampler may request "context rows" by setting need_context_rows
25 * during startup.  In this case, the input arrays will contain at least
26 * one row group's worth of pixels above and below the passed-in data;
27 * the caller will create dummy rows at image top and bottom by replicating
28 * the first or last real pixel row.
29 *
30 * An excellent reference for image resampling is
31 *   Digital Image Warping, George Wolberg, 1990.
32 *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
33 *
34 * The downsampling algorithm used here is a simple average of the source
35 * pixels covered by the output pixel.  The hi-falutin sampling literature
36 * refers to this as a "box filter".  In general the characteristics of a box
37 * filter are not very good, but for the specific cases we normally use (1:1
38 * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
39 * nearly so bad.  If you intend to use other sampling ratios, you'd be well
40 * advised to improve this code.
41 *
42 * A simple input-smoothing capability is provided.  This is mainly intended
43 * for cleaning up color-dithered GIF input files (if you find it inadequate,
44 * we suggest using an external filtering program such as pnmconvol).  When
45 * enabled, each input pixel P is replaced by a weighted sum of itself and its
46 * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
47 * where SF = (smoothing_factor / 1024).
48 * Currently, smoothing is only supported for 2h2v sampling factors.
49 */
50
51#define JPEG_INTERNALS
52#include "jinclude.h"
53#include "jpeglib.h"
54#include "jsimd.h"
55
56
57/* Pointer to routine to downsample a single component */
58typedef void (*downsample1_ptr) (j_compress_ptr cinfo,
59                                 jpeg_component_info * compptr,
60                                 JSAMPARRAY input_data,
61                                 JSAMPARRAY output_data);
62
63/* Private subobject */
64
65typedef struct {
66  struct jpeg_downsampler pub;  /* public fields */
67
68  /* Downsampling method pointers, one per component */
69  downsample1_ptr methods[MAX_COMPONENTS];
70} my_downsampler;
71
72typedef my_downsampler * my_downsample_ptr;
73
74
75/*
76 * Initialize for a downsampling pass.
77 */
78
79METHODDEF(void)
80start_pass_downsample (j_compress_ptr cinfo)
81{
82  /* no work for now */
83}
84
85
86/*
87 * Expand a component horizontally from width input_cols to width output_cols,
88 * by duplicating the rightmost samples.
89 */
90
91LOCAL(void)
92expand_right_edge (JSAMPARRAY image_data, int num_rows,
93                   JDIMENSION input_cols, JDIMENSION output_cols)
94{
95  register JSAMPROW ptr;
96  register JSAMPLE pixval;
97  register int count;
98  int row;
99  int numcols = (int) (output_cols - input_cols);
100
101  if (numcols > 0) {
102    for (row = 0; row < num_rows; row++) {
103      ptr = image_data[row] + input_cols;
104      pixval = ptr[-1];         /* don't need GETJSAMPLE() here */
105      for (count = numcols; count > 0; count--)
106        *ptr++ = pixval;
107    }
108  }
109}
110
111
112/*
113 * Do downsampling for a whole row group (all components).
114 *
115 * In this version we simply downsample each component independently.
116 */
117
118METHODDEF(void)
119sep_downsample (j_compress_ptr cinfo,
120                JSAMPIMAGE input_buf, JDIMENSION in_row_index,
121                JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
122{
123  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
124  int ci;
125  jpeg_component_info * compptr;
126  JSAMPARRAY in_ptr, out_ptr;
127
128  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
129       ci++, compptr++) {
130    in_ptr = input_buf[ci] + in_row_index;
131    out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor);
132    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
133  }
134}
135
136
137/*
138 * Downsample pixel values of a single component.
139 * One row group is processed per call.
140 * This version handles arbitrary integral sampling ratios, without smoothing.
141 * Note that this version is not actually used for customary sampling ratios.
142 */
143
144METHODDEF(void)
145int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
146                JSAMPARRAY input_data, JSAMPARRAY output_data)
147{
148  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
149  JDIMENSION outcol, outcol_h;  /* outcol_h == outcol*h_expand */
150  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
151  JSAMPROW inptr, outptr;
152  INT32 outvalue;
153
154  h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
155  v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor;
156  numpix = h_expand * v_expand;
157  numpix2 = numpix/2;
158
159  /* Expand input data enough to let all the output samples be generated
160   * by the standard loop.  Special-casing padded output would be more
161   * efficient.
162   */
163  expand_right_edge(input_data, cinfo->max_v_samp_factor,
164                    cinfo->image_width, output_cols * h_expand);
165
166  inrow = 0;
167  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
168    outptr = output_data[outrow];
169    for (outcol = 0, outcol_h = 0; outcol < output_cols;
170         outcol++, outcol_h += h_expand) {
171      outvalue = 0;
172      for (v = 0; v < v_expand; v++) {
173        inptr = input_data[inrow+v] + outcol_h;
174        for (h = 0; h < h_expand; h++) {
175          outvalue += (INT32) GETJSAMPLE(*inptr++);
176        }
177      }
178      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
179    }
180    inrow += v_expand;
181  }
182}
183
184
185/*
186 * Downsample pixel values of a single component.
187 * This version handles the special case of a full-size component,
188 * without smoothing.
189 */
190
191METHODDEF(void)
192fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
193                     JSAMPARRAY input_data, JSAMPARRAY output_data)
194{
195  /* Copy the data */
196  jcopy_sample_rows(input_data, 0, output_data, 0,
197                    cinfo->max_v_samp_factor, cinfo->image_width);
198  /* Edge-expand */
199  expand_right_edge(output_data, cinfo->max_v_samp_factor,
200                    cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
201}
202
203
204/*
205 * Downsample pixel values of a single component.
206 * This version handles the common case of 2:1 horizontal and 1:1 vertical,
207 * without smoothing.
208 *
209 * A note about the "bias" calculations: when rounding fractional values to
210 * integer, we do not want to always round 0.5 up to the next integer.
211 * If we did that, we'd introduce a noticeable bias towards larger values.
212 * Instead, this code is arranged so that 0.5 will be rounded up or down at
213 * alternate pixel locations (a simple ordered dither pattern).
214 */
215
216METHODDEF(void)
217h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
218                 JSAMPARRAY input_data, JSAMPARRAY output_data)
219{
220  int outrow;
221  JDIMENSION outcol;
222  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
223  register JSAMPROW inptr, outptr;
224  register int bias;
225
226  /* Expand input data enough to let all the output samples be generated
227   * by the standard loop.  Special-casing padded output would be more
228   * efficient.
229   */
230  expand_right_edge(input_data, cinfo->max_v_samp_factor,
231                    cinfo->image_width, output_cols * 2);
232
233  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
234    outptr = output_data[outrow];
235    inptr = input_data[outrow];
236    bias = 0;                   /* bias = 0,1,0,1,... for successive samples */
237    for (outcol = 0; outcol < output_cols; outcol++) {
238      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
239                              + bias) >> 1);
240      bias ^= 1;                /* 0=>1, 1=>0 */
241      inptr += 2;
242    }
243  }
244}
245
246
247/*
248 * Downsample pixel values of a single component.
249 * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
250 * without smoothing.
251 */
252
253METHODDEF(void)
254h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
255                 JSAMPARRAY input_data, JSAMPARRAY output_data)
256{
257  int inrow, outrow;
258  JDIMENSION outcol;
259  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
260  register JSAMPROW inptr0, inptr1, outptr;
261  register int bias;
262
263  /* Expand input data enough to let all the output samples be generated
264   * by the standard loop.  Special-casing padded output would be more
265   * efficient.
266   */
267  expand_right_edge(input_data, cinfo->max_v_samp_factor,
268                    cinfo->image_width, output_cols * 2);
269
270  inrow = 0;
271  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
272    outptr = output_data[outrow];
273    inptr0 = input_data[inrow];
274    inptr1 = input_data[inrow+1];
275    bias = 1;                   /* bias = 1,2,1,2,... for successive samples */
276    for (outcol = 0; outcol < output_cols; outcol++) {
277      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
278                              GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
279                              + bias) >> 2);
280      bias ^= 3;                /* 1=>2, 2=>1 */
281      inptr0 += 2; inptr1 += 2;
282    }
283    inrow += 2;
284  }
285}
286
287
288#ifdef INPUT_SMOOTHING_SUPPORTED
289
290/*
291 * Downsample pixel values of a single component.
292 * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
293 * with smoothing.  One row of context is required.
294 */
295
296METHODDEF(void)
297h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
298                        JSAMPARRAY input_data, JSAMPARRAY output_data)
299{
300  int inrow, outrow;
301  JDIMENSION colctr;
302  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
303  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
304  INT32 membersum, neighsum, memberscale, neighscale;
305
306  /* Expand input data enough to let all the output samples be generated
307   * by the standard loop.  Special-casing padded output would be more
308   * efficient.
309   */
310  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
311                    cinfo->image_width, output_cols * 2);
312
313  /* We don't bother to form the individual "smoothed" input pixel values;
314   * we can directly compute the output which is the average of the four
315   * smoothed values.  Each of the four member pixels contributes a fraction
316   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
317   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
318   * output.  The four corner-adjacent neighbor pixels contribute a fraction
319   * SF to just one smoothed pixel, or SF/4 to the final output; while the
320   * eight edge-adjacent neighbors contribute SF to each of two smoothed
321   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
322   * factors are scaled by 2^16 = 65536.
323   * Also recall that SF = smoothing_factor / 1024.
324   */
325
326  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
327  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
328
329  inrow = 0;
330  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
331    outptr = output_data[outrow];
332    inptr0 = input_data[inrow];
333    inptr1 = input_data[inrow+1];
334    above_ptr = input_data[inrow-1];
335    below_ptr = input_data[inrow+2];
336
337    /* Special case for first column: pretend column -1 is same as column 0 */
338    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
339                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
340    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
341               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
342               GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
343               GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
344    neighsum += neighsum;
345    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
346                GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
347    membersum = membersum * memberscale + neighsum * neighscale;
348    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
349    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
350
351    for (colctr = output_cols - 2; colctr > 0; colctr--) {
352      /* sum of pixels directly mapped to this output element */
353      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
354                  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
355      /* sum of edge-neighbor pixels */
356      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
357                 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
358                 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
359                 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
360      /* The edge-neighbors count twice as much as corner-neighbors */
361      neighsum += neighsum;
362      /* Add in the corner-neighbors */
363      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
364                  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
365      /* form final output scaled up by 2^16 */
366      membersum = membersum * memberscale + neighsum * neighscale;
367      /* round, descale and output it */
368      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
369      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
370    }
371
372    /* Special case for last column */
373    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
374                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
375    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
376               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
377               GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
378               GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
379    neighsum += neighsum;
380    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
381                GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
382    membersum = membersum * memberscale + neighsum * neighscale;
383    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
384
385    inrow += 2;
386  }
387}
388
389
390/*
391 * Downsample pixel values of a single component.
392 * This version handles the special case of a full-size component,
393 * with smoothing.  One row of context is required.
394 */
395
396METHODDEF(void)
397fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
398                            JSAMPARRAY input_data, JSAMPARRAY output_data)
399{
400  int outrow;
401  JDIMENSION colctr;
402  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
403  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
404  INT32 membersum, neighsum, memberscale, neighscale;
405  int colsum, lastcolsum, nextcolsum;
406
407  /* Expand input data enough to let all the output samples be generated
408   * by the standard loop.  Special-casing padded output would be more
409   * efficient.
410   */
411  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
412                    cinfo->image_width, output_cols);
413
414  /* Each of the eight neighbor pixels contributes a fraction SF to the
415   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
416   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
417   * Also recall that SF = smoothing_factor / 1024.
418   */
419
420  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
421  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
422
423  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
424    outptr = output_data[outrow];
425    inptr = input_data[outrow];
426    above_ptr = input_data[outrow-1];
427    below_ptr = input_data[outrow+1];
428
429    /* Special case for first column */
430    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
431             GETJSAMPLE(*inptr);
432    membersum = GETJSAMPLE(*inptr++);
433    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
434                 GETJSAMPLE(*inptr);
435    neighsum = colsum + (colsum - membersum) + nextcolsum;
436    membersum = membersum * memberscale + neighsum * neighscale;
437    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
438    lastcolsum = colsum; colsum = nextcolsum;
439
440    for (colctr = output_cols - 2; colctr > 0; colctr--) {
441      membersum = GETJSAMPLE(*inptr++);
442      above_ptr++; below_ptr++;
443      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
444                   GETJSAMPLE(*inptr);
445      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
446      membersum = membersum * memberscale + neighsum * neighscale;
447      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
448      lastcolsum = colsum; colsum = nextcolsum;
449    }
450
451    /* Special case for last column */
452    membersum = GETJSAMPLE(*inptr);
453    neighsum = lastcolsum + (colsum - membersum) + colsum;
454    membersum = membersum * memberscale + neighsum * neighscale;
455    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
456
457  }
458}
459
460#endif /* INPUT_SMOOTHING_SUPPORTED */
461
462
463/*
464 * Module initialization routine for downsampling.
465 * Note that we must select a routine for each component.
466 */
467
468GLOBAL(void)
469jinit_downsampler (j_compress_ptr cinfo)
470{
471  my_downsample_ptr downsample;
472  int ci;
473  jpeg_component_info * compptr;
474  boolean smoothok = TRUE;
475
476  downsample = (my_downsample_ptr)
477    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
478                                sizeof(my_downsampler));
479  cinfo->downsample = (struct jpeg_downsampler *) downsample;
480  downsample->pub.start_pass = start_pass_downsample;
481  downsample->pub.downsample = sep_downsample;
482  downsample->pub.need_context_rows = FALSE;
483
484  if (cinfo->CCIR601_sampling)
485    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
486
487  /* Verify we can handle the sampling factors, and set up method pointers */
488  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
489       ci++, compptr++) {
490    if (compptr->h_samp_factor == cinfo->max_h_samp_factor &&
491        compptr->v_samp_factor == cinfo->max_v_samp_factor) {
492#ifdef INPUT_SMOOTHING_SUPPORTED
493      if (cinfo->smoothing_factor) {
494        downsample->methods[ci] = fullsize_smooth_downsample;
495        downsample->pub.need_context_rows = TRUE;
496      } else
497#endif
498        downsample->methods[ci] = fullsize_downsample;
499    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
500               compptr->v_samp_factor == cinfo->max_v_samp_factor) {
501      smoothok = FALSE;
502      if (jsimd_can_h2v1_downsample())
503        downsample->methods[ci] = jsimd_h2v1_downsample;
504      else
505        downsample->methods[ci] = h2v1_downsample;
506    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
507               compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
508#ifdef INPUT_SMOOTHING_SUPPORTED
509      if (cinfo->smoothing_factor) {
510#if defined(__mips__)
511        if (jsimd_can_h2v2_smooth_downsample())
512          downsample->methods[ci] = jsimd_h2v2_smooth_downsample;
513        else
514#endif
515          downsample->methods[ci] = h2v2_smooth_downsample;
516        downsample->pub.need_context_rows = TRUE;
517      } else
518#endif
519      {
520        if (jsimd_can_h2v2_downsample())
521          downsample->methods[ci] = jsimd_h2v2_downsample;
522        else
523          downsample->methods[ci] = h2v2_downsample;
524      }
525    } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
526               (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
527      smoothok = FALSE;
528      downsample->methods[ci] = int_downsample;
529    } else
530      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
531  }
532
533#ifdef INPUT_SMOOTHING_SUPPORTED
534  if (cinfo->smoothing_factor && !smoothok)
535    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
536#endif
537}
538