1/*
2 * jddctmgr.c
3 *
4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains the inverse-DCT management logic.
9 * This code selects a particular IDCT implementation to be used,
10 * and it performs related housekeeping chores.  No code in this file
11 * is executed per IDCT step, only during output pass setup.
12 *
13 * Note that the IDCT routines are responsible for performing coefficient
14 * dequantization as well as the IDCT proper.  This module sets up the
15 * dequantization multiplier table needed by the IDCT routine.
16 */
17
18#define JPEG_INTERNALS
19#include "jinclude.h"
20#include "jpeglib.h"
21#include "jdct.h"		/* Private declarations for DCT subsystem */
22
23#ifdef ANDROID_ARMV6_IDCT
24  #undef ANDROID_ARMV6_IDCT
25  #ifdef __arm__
26    #include <machine/cpu-features.h>
27    #if __ARM_ARCH__ >= 6
28      #define ANDROID_ARMV6_IDCT
29    #else
30      #warning "ANDROID_ARMV6_IDCT is disabled"
31    #endif
32  #endif
33#endif
34
35#ifdef NV_ARM_NEON
36#include "jsimd_neon.h"
37#endif
38
39#ifdef ANDROID_ARMV6_IDCT
40
41/* Intentionally declare the prototype with arguments of primitive types instead
42 * of type-defined ones. This will at least generate some warnings if jmorecfg.h
43 * is changed and becomes incompatible with the assembly code.
44 */
45extern void armv6_idct(short *coefs, int *quans, unsigned char **rows, int col);
46
47void jpeg_idct_armv6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
48		 JCOEFPTR coef_block,
49		 JSAMPARRAY output_buf, JDIMENSION output_col)
50{
51  IFAST_MULT_TYPE *dct_table = (IFAST_MULT_TYPE *)compptr->dct_table;
52  armv6_idct(coef_block, dct_table, output_buf, output_col);
53}
54
55#endif
56
57#ifdef ANDROID_INTELSSE2_IDCT
58extern short __attribute__((aligned(16))) quantptrSSE[DCTSIZE2];
59extern void jpeg_idct_intelsse (j_decompress_ptr cinfo, jpeg_component_info * compptr,
60		JCOEFPTR coef_block,
61		JSAMPARRAY output_buf, JDIMENSION output_col);
62#endif
63
64#ifdef ANDROID_MIPS_IDCT
65extern void jpeg_idct_mips(j_decompress_ptr, jpeg_component_info *, JCOEFPTR, JSAMPARRAY, JDIMENSION);
66#endif
67
68/*
69 * The decompressor input side (jdinput.c) saves away the appropriate
70 * quantization table for each component at the start of the first scan
71 * involving that component.  (This is necessary in order to correctly
72 * decode files that reuse Q-table slots.)
73 * When we are ready to make an output pass, the saved Q-table is converted
74 * to a multiplier table that will actually be used by the IDCT routine.
75 * The multiplier table contents are IDCT-method-dependent.  To support
76 * application changes in IDCT method between scans, we can remake the
77 * multiplier tables if necessary.
78 * In buffered-image mode, the first output pass may occur before any data
79 * has been seen for some components, and thus before their Q-tables have
80 * been saved away.  To handle this case, multiplier tables are preset
81 * to zeroes; the result of the IDCT will be a neutral gray level.
82 */
83
84
85/* Private subobject for this module */
86
87typedef struct {
88  struct jpeg_inverse_dct pub;	/* public fields */
89
90  /* This array contains the IDCT method code that each multiplier table
91   * is currently set up for, or -1 if it's not yet set up.
92   * The actual multiplier tables are pointed to by dct_table in the
93   * per-component comp_info structures.
94   */
95  int cur_method[MAX_COMPONENTS];
96} my_idct_controller;
97
98typedef my_idct_controller * my_idct_ptr;
99
100
101/* Allocated multiplier tables: big enough for any supported variant */
102
103typedef union {
104  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
105#ifdef DCT_IFAST_SUPPORTED
106  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
107#endif
108#ifdef DCT_FLOAT_SUPPORTED
109  FLOAT_MULT_TYPE float_array[DCTSIZE2];
110#endif
111} multiplier_table;
112
113
114/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
115 * so be sure to compile that code if either ISLOW or SCALING is requested.
116 */
117#ifdef DCT_ISLOW_SUPPORTED
118#define PROVIDE_ISLOW_TABLES
119#else
120#ifdef IDCT_SCALING_SUPPORTED
121#define PROVIDE_ISLOW_TABLES
122#endif
123#endif
124
125
126/*
127 * Prepare for an output pass.
128 * Here we select the proper IDCT routine for each component and build
129 * a matching multiplier table.
130 */
131
132METHODDEF(void)
133start_pass (j_decompress_ptr cinfo)
134{
135  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
136  int ci, i;
137  jpeg_component_info *compptr;
138  int method = 0;
139  inverse_DCT_method_ptr method_ptr = NULL;
140  JQUANT_TBL * qtbl;
141
142  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
143       ci++, compptr++) {
144    /* Select the proper IDCT routine for this component's scaling */
145    switch (compptr->DCT_scaled_size) {
146#ifdef IDCT_SCALING_SUPPORTED
147    case 1:
148      method_ptr = jpeg_idct_1x1;
149      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
150      break;
151    case 2:
152#if defined(NV_ARM_NEON) && defined(__ARM_HAVE_NEON)
153      if (cap_neon_idct_2x2()) {
154        method_ptr = jsimd_idct_2x2;
155      } else {
156        method_ptr = jpeg_idct_2x2;
157      }
158#else
159      method_ptr = jpeg_idct_2x2;
160#endif
161      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
162      break;
163    case 4:
164#if defined(NV_ARM_NEON) && defined(__ARM_HAVE_NEON)
165	  if (cap_neon_idct_4x4()) {
166        method_ptr = jsimd_idct_4x4;
167      } else {
168        method_ptr = jpeg_idct_4x4;
169      }
170#else
171      method_ptr = jpeg_idct_4x4;
172#endif
173      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
174      break;
175#endif
176    case DCTSIZE:
177      switch (cinfo->dct_method) {
178#ifdef ANDROID_ARMV6_IDCT
179      case JDCT_ISLOW:
180      case JDCT_IFAST:
181	method_ptr = jpeg_idct_armv6;
182	method = JDCT_IFAST;
183	break;
184#else /* ANDROID_ARMV6_IDCT */
185#ifdef ANDROID_INTELSSE2_IDCT
186      case JDCT_ISLOW:
187      case JDCT_IFAST:
188	method_ptr = jpeg_idct_intelsse;
189	method = JDCT_ISLOW; /* Use quant table of ISLOW.*/
190	break;
191#else /* ANDROID_INTELSSE2_IDCT */
192#ifdef ANDROID_MIPS_IDCT
193      case JDCT_ISLOW:
194      case JDCT_IFAST:
195	method_ptr = jpeg_idct_mips;
196	method = JDCT_IFAST;
197	break;
198#else /* ANDROID_MIPS_IDCT */
199#ifdef DCT_ISLOW_SUPPORTED
200      case JDCT_ISLOW:
201	method_ptr = jpeg_idct_islow;
202	method = JDCT_ISLOW;
203	break;
204#endif
205#ifdef DCT_IFAST_SUPPORTED
206      case JDCT_IFAST:
207#if defined(NV_ARM_NEON) && defined(__ARM_HAVE_NEON)
208        if (cap_neon_idct_ifast()) {
209          method_ptr = jsimd_idct_ifast;
210        } else {
211          method_ptr = jpeg_idct_ifast;
212        }
213#else
214        method_ptr = jpeg_idct_ifast;
215#endif
216	method = JDCT_IFAST;
217	break;
218#endif
219#endif /* ANDROID_MIPS_IDCT */
220#endif /* ANDROID_INTELSSE2_IDCT*/
221#endif /* ANDROID_ARMV6_IDCT */
222#ifdef DCT_FLOAT_SUPPORTED
223      case JDCT_FLOAT:
224	method_ptr = jpeg_idct_float;
225	method = JDCT_FLOAT;
226	break;
227#endif
228      default:
229	ERREXIT(cinfo, JERR_NOT_COMPILED);
230	break;
231      }
232      break;
233    default:
234      ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size);
235      break;
236    }
237    idct->pub.inverse_DCT[ci] = method_ptr;
238    /* Create multiplier table from quant table.
239     * However, we can skip this if the component is uninteresting
240     * or if we already built the table.  Also, if no quant table
241     * has yet been saved for the component, we leave the
242     * multiplier table all-zero; we'll be reading zeroes from the
243     * coefficient controller's buffer anyway.
244     */
245    if (! compptr->component_needed || idct->cur_method[ci] == method)
246      continue;
247    qtbl = compptr->quant_table;
248    if (qtbl == NULL)		/* happens if no data yet for component */
249      continue;
250    idct->cur_method[ci] = method;
251    switch (method) {
252#ifdef PROVIDE_ISLOW_TABLES
253    case JDCT_ISLOW:
254      {
255	/* For LL&M IDCT method, multipliers are equal to raw quantization
256	 * coefficients, but are stored as ints to ensure access efficiency.
257	 */
258	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
259	for (i = 0; i < DCTSIZE2; i++) {
260	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
261	}
262      }
263      break;
264#endif
265#ifdef DCT_IFAST_SUPPORTED
266    case JDCT_IFAST:
267      {
268	/* For AA&N IDCT method, multipliers are equal to quantization
269	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
270	 *   scalefactor[0] = 1
271	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
272	 * For integer operation, the multiplier table is to be scaled by
273	 * IFAST_SCALE_BITS.
274	 */
275	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
276#ifdef ANDROID_ARMV6_IDCT
277	/* Precomputed values scaled up by 15 bits. */
278	static const unsigned short scales[DCTSIZE2] = {
279	  32768, 45451, 42813, 38531, 32768, 25746, 17734,  9041,
280	  45451, 63042, 59384, 53444, 45451, 35710, 24598, 12540,
281	  42813, 59384, 55938, 50343, 42813, 33638, 23170, 11812,
282	  38531, 53444, 50343, 45308, 38531, 30274, 20853, 10631,
283	  32768, 45451, 42813, 38531, 32768, 25746, 17734,  9041,
284	  25746, 35710, 33638, 30274, 25746, 20228, 13933,  7103,
285	  17734, 24598, 23170, 20853, 17734, 13933,  9598,  4893,
286	   9041, 12540, 11812, 10631,  9041,  7103,  4893,  2494,
287	};
288	/* Inverse map of [7, 5, 1, 3, 0, 2, 4, 6]. */
289	static const char orders[DCTSIZE] = {4, 2, 5, 3, 6, 1, 7, 0};
290	/* Reorder the columns after transposing. */
291	for (i = 0; i < DCTSIZE2; ++i) {
292	  int j = ((i & 7) << 3) + orders[i >> 3];
293	  ifmtbl[j] = (qtbl->quantval[i] * scales[i] + 2) >> 2;
294	}
295#else /* ANDROID_ARMV6_IDCT */
296
297#define CONST_BITS 14
298	static const INT16 aanscales[DCTSIZE2] = {
299	  /* precomputed values scaled up by 14 bits */
300	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
301	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
302	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
303	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
304	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
305	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
306	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
307	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
308	};
309	SHIFT_TEMPS
310
311	for (i = 0; i < DCTSIZE2; i++) {
312	  ifmtbl[i] = (IFAST_MULT_TYPE)
313	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
314				  (INT32) aanscales[i]),
315		    CONST_BITS-IFAST_SCALE_BITS);
316	}
317#endif /* ANDROID_ARMV6_IDCT */
318      }
319      break;
320#endif
321#ifdef DCT_FLOAT_SUPPORTED
322    case JDCT_FLOAT:
323      {
324	/* For float AA&N IDCT method, multipliers are equal to quantization
325	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
326	 *   scalefactor[0] = 1
327	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
328	 */
329	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
330	int row, col;
331	static const double aanscalefactor[DCTSIZE] = {
332	  1.0, 1.387039845, 1.306562965, 1.175875602,
333	  1.0, 0.785694958, 0.541196100, 0.275899379
334	};
335
336	i = 0;
337	for (row = 0; row < DCTSIZE; row++) {
338	  for (col = 0; col < DCTSIZE; col++) {
339	    fmtbl[i] = (FLOAT_MULT_TYPE)
340	      ((double) qtbl->quantval[i] *
341	       aanscalefactor[row] * aanscalefactor[col]);
342	    i++;
343	  }
344	}
345      }
346      break;
347#endif
348    default:
349      ERREXIT(cinfo, JERR_NOT_COMPILED);
350      break;
351    }
352  }
353}
354
355
356/*
357 * Initialize IDCT manager.
358 */
359
360GLOBAL(void)
361jinit_inverse_dct (j_decompress_ptr cinfo)
362{
363  my_idct_ptr idct;
364  int ci;
365  jpeg_component_info *compptr;
366
367  idct = (my_idct_ptr)
368    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
369				SIZEOF(my_idct_controller));
370  cinfo->idct = (struct jpeg_inverse_dct *) idct;
371  idct->pub.start_pass = start_pass;
372
373  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
374       ci++, compptr++) {
375    /* Allocate and pre-zero a multiplier table for each component */
376    compptr->dct_table =
377      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
378				  SIZEOF(multiplier_table));
379    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
380    /* Mark multiplier table not yet set up for any method */
381    idct->cur_method[ci] = -1;
382  }
383}
384