1/*
2 * jcdctmgr.c
3 *
4 * This file was part of the Independent JPEG Group's software:
5 * Copyright (C) 1994-1996, Thomas G. Lane.
6 * libjpeg-turbo Modifications:
7 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9 * Copyright (C) 2011 D. R. Commander
10 * For conditions of distribution and use, see the accompanying README file.
11 *
12 * This file contains the forward-DCT management logic.
13 * This code selects a particular DCT implementation to be used,
14 * and it performs related housekeeping chores including coefficient
15 * quantization.
16 */
17
18#define JPEG_INTERNALS
19#include "jinclude.h"
20#include "jpeglib.h"
21#include "jdct.h"		/* Private declarations for DCT subsystem */
22#include "jsimddct.h"
23
24
25/* Private subobject for this module */
26
27typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
28typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
29
30typedef JMETHOD(void, convsamp_method_ptr,
31                (JSAMPARRAY sample_data, JDIMENSION start_col,
32                 DCTELEM * workspace));
33typedef JMETHOD(void, float_convsamp_method_ptr,
34                (JSAMPARRAY sample_data, JDIMENSION start_col,
35                 FAST_FLOAT *workspace));
36
37typedef JMETHOD(void, quantize_method_ptr,
38                (JCOEFPTR coef_block, DCTELEM * divisors,
39                 DCTELEM * workspace));
40typedef JMETHOD(void, float_quantize_method_ptr,
41                (JCOEFPTR coef_block, FAST_FLOAT * divisors,
42                 FAST_FLOAT * workspace));
43
44METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
45
46typedef struct {
47  struct jpeg_forward_dct pub;	/* public fields */
48
49  /* Pointer to the DCT routine actually in use */
50  forward_DCT_method_ptr dct;
51  convsamp_method_ptr convsamp;
52  quantize_method_ptr quantize;
53
54  /* The actual post-DCT divisors --- not identical to the quant table
55   * entries, because of scaling (especially for an unnormalized DCT).
56   * Each table is given in normal array order.
57   */
58  DCTELEM * divisors[NUM_QUANT_TBLS];
59
60  /* work area for FDCT subroutine */
61  DCTELEM * workspace;
62
63#ifdef DCT_FLOAT_SUPPORTED
64  /* Same as above for the floating-point case. */
65  float_DCT_method_ptr float_dct;
66  float_convsamp_method_ptr float_convsamp;
67  float_quantize_method_ptr float_quantize;
68  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
69  FAST_FLOAT * float_workspace;
70#endif
71} my_fdct_controller;
72
73typedef my_fdct_controller * my_fdct_ptr;
74
75
76/*
77 * Find the highest bit in an integer through binary search.
78 */
79LOCAL(int)
80flss (UINT16 val)
81{
82  int bit;
83
84  bit = 16;
85
86  if (!val)
87    return 0;
88
89  if (!(val & 0xff00)) {
90    bit -= 8;
91    val <<= 8;
92  }
93  if (!(val & 0xf000)) {
94    bit -= 4;
95    val <<= 4;
96  }
97  if (!(val & 0xc000)) {
98    bit -= 2;
99    val <<= 2;
100  }
101  if (!(val & 0x8000)) {
102    bit -= 1;
103    val <<= 1;
104  }
105
106  return bit;
107}
108
109/*
110 * Compute values to do a division using reciprocal.
111 *
112 * This implementation is based on an algorithm described in
113 *   "How to optimize for the Pentium family of microprocessors"
114 *   (http://www.agner.org/assem/).
115 * More information about the basic algorithm can be found in
116 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
117 *
118 * The basic idea is to replace x/d by x * d^-1. In order to store
119 * d^-1 with enough precision we shift it left a few places. It turns
120 * out that this algoright gives just enough precision, and also fits
121 * into DCTELEM:
122 *
123 *   b = (the number of significant bits in divisor) - 1
124 *   r = (word size) + b
125 *   f = 2^r / divisor
126 *
127 * f will not be an integer for most cases, so we need to compensate
128 * for the rounding error introduced:
129 *
130 *   no fractional part:
131 *
132 *       result = input >> r
133 *
134 *   fractional part of f < 0.5:
135 *
136 *       round f down to nearest integer
137 *       result = ((input + 1) * f) >> r
138 *
139 *   fractional part of f > 0.5:
140 *
141 *       round f up to nearest integer
142 *       result = (input * f) >> r
143 *
144 * This is the original algorithm that gives truncated results. But we
145 * want properly rounded results, so we replace "input" with
146 * "input + divisor/2".
147 *
148 * In order to allow SIMD implementations we also tweak the values to
149 * allow the same calculation to be made at all times:
150 *
151 *   dctbl[0] = f rounded to nearest integer
152 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
153 *   dctbl[2] = 1 << ((word size) * 2 - r)
154 *   dctbl[3] = r - (word size)
155 *
156 * dctbl[2] is for stupid instruction sets where the shift operation
157 * isn't member wise (e.g. MMX).
158 *
159 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
160 * is that most SIMD implementations have a "multiply and store top
161 * half" operation.
162 *
163 * Lastly, we store each of the values in their own table instead
164 * of in a consecutive manner, yet again in order to allow SIMD
165 * routines.
166 */
167LOCAL(int)
168compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
169{
170  UDCTELEM2 fq, fr;
171  UDCTELEM c;
172  int b, r;
173
174  b = flss(divisor) - 1;
175  r  = sizeof(DCTELEM) * 8 + b;
176
177  fq = ((UDCTELEM2)1 << r) / divisor;
178  fr = ((UDCTELEM2)1 << r) % divisor;
179
180  c = divisor / 2; /* for rounding */
181
182  if (fr == 0) { /* divisor is power of two */
183    /* fq will be one bit too large to fit in DCTELEM, so adjust */
184    fq >>= 1;
185    r--;
186  } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
187    c++;
188  } else { /* fractional part is > 0.5 */
189    fq++;
190  }
191
192  dtbl[DCTSIZE2 * 0] = (DCTELEM) fq;      /* reciprocal */
193  dtbl[DCTSIZE2 * 1] = (DCTELEM) c;       /* correction + roundfactor */
194  dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r));  /* scale */
195  dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
196
197  if(r <= 16) return 0;
198  else return 1;
199}
200
201/*
202 * Initialize for a processing pass.
203 * Verify that all referenced Q-tables are present, and set up
204 * the divisor table for each one.
205 * In the current implementation, DCT of all components is done during
206 * the first pass, even if only some components will be output in the
207 * first scan.  Hence all components should be examined here.
208 */
209
210METHODDEF(void)
211start_pass_fdctmgr (j_compress_ptr cinfo)
212{
213  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
214  int ci, qtblno, i;
215  jpeg_component_info *compptr;
216  JQUANT_TBL * qtbl;
217  DCTELEM * dtbl;
218
219  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
220       ci++, compptr++) {
221    qtblno = compptr->quant_tbl_no;
222    /* Make sure specified quantization table is present */
223    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
224	cinfo->quant_tbl_ptrs[qtblno] == NULL)
225      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
226    qtbl = cinfo->quant_tbl_ptrs[qtblno];
227    /* Compute divisors for this quant table */
228    /* We may do this more than once for same table, but it's not a big deal */
229    switch (cinfo->dct_method) {
230#ifdef DCT_ISLOW_SUPPORTED
231    case JDCT_ISLOW:
232      /* For LL&M IDCT method, divisors are equal to raw quantization
233       * coefficients multiplied by 8 (to counteract scaling).
234       */
235      if (fdct->divisors[qtblno] == NULL) {
236	fdct->divisors[qtblno] = (DCTELEM *)
237	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
238				      (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
239      }
240      dtbl = fdct->divisors[qtblno];
241      for (i = 0; i < DCTSIZE2; i++) {
242	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
243	  && fdct->quantize == jsimd_quantize)
244	  fdct->quantize = quantize;
245      }
246      break;
247#endif
248#ifdef DCT_IFAST_SUPPORTED
249    case JDCT_IFAST:
250      {
251	/* For AA&N IDCT method, divisors are equal to quantization
252	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
253	 *   scalefactor[0] = 1
254	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
255	 * We apply a further scale factor of 8.
256	 */
257#define CONST_BITS 14
258	static const INT16 aanscales[DCTSIZE2] = {
259	  /* precomputed values scaled up by 14 bits */
260	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
261	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
262	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
263	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
264	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
265	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
266	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
267	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
268	};
269	SHIFT_TEMPS
270
271	if (fdct->divisors[qtblno] == NULL) {
272	  fdct->divisors[qtblno] = (DCTELEM *)
273	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
274					(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
275	}
276	dtbl = fdct->divisors[qtblno];
277	for (i = 0; i < DCTSIZE2; i++) {
278	  if(!compute_reciprocal(
279	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
280				  (INT32) aanscales[i]),
281		    CONST_BITS-3), &dtbl[i])
282	    && fdct->quantize == jsimd_quantize)
283	    fdct->quantize = quantize;
284	}
285      }
286      break;
287#endif
288#ifdef DCT_FLOAT_SUPPORTED
289    case JDCT_FLOAT:
290      {
291	/* For float AA&N IDCT method, divisors are equal to quantization
292	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
293	 *   scalefactor[0] = 1
294	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
295	 * We apply a further scale factor of 8.
296	 * What's actually stored is 1/divisor so that the inner loop can
297	 * use a multiplication rather than a division.
298	 */
299	FAST_FLOAT * fdtbl;
300	int row, col;
301	static const double aanscalefactor[DCTSIZE] = {
302	  1.0, 1.387039845, 1.306562965, 1.175875602,
303	  1.0, 0.785694958, 0.541196100, 0.275899379
304	};
305
306	if (fdct->float_divisors[qtblno] == NULL) {
307	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
308	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
309					DCTSIZE2 * SIZEOF(FAST_FLOAT));
310	}
311	fdtbl = fdct->float_divisors[qtblno];
312	i = 0;
313	for (row = 0; row < DCTSIZE; row++) {
314	  for (col = 0; col < DCTSIZE; col++) {
315	    fdtbl[i] = (FAST_FLOAT)
316	      (1.0 / (((double) qtbl->quantval[i] *
317		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
318	    i++;
319	  }
320	}
321      }
322      break;
323#endif
324    default:
325      ERREXIT(cinfo, JERR_NOT_COMPILED);
326      break;
327    }
328  }
329}
330
331
332/*
333 * Load data into workspace, applying unsigned->signed conversion.
334 */
335
336METHODDEF(void)
337convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
338{
339  register DCTELEM *workspaceptr;
340  register JSAMPROW elemptr;
341  register int elemr;
342
343  workspaceptr = workspace;
344  for (elemr = 0; elemr < DCTSIZE; elemr++) {
345    elemptr = sample_data[elemr] + start_col;
346
347#if DCTSIZE == 8		/* unroll the inner loop */
348    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
349    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
350    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
351    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
352    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
353    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
354    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
355    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
356#else
357    {
358      register int elemc;
359      for (elemc = DCTSIZE; elemc > 0; elemc--)
360        *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
361    }
362#endif
363  }
364}
365
366
367/*
368 * Quantize/descale the coefficients, and store into coef_blocks[].
369 */
370
371METHODDEF(void)
372quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
373{
374  int i;
375  DCTELEM temp;
376  UDCTELEM recip, corr, shift;
377  UDCTELEM2 product;
378  JCOEFPTR output_ptr = coef_block;
379
380  for (i = 0; i < DCTSIZE2; i++) {
381    temp = workspace[i];
382    recip = divisors[i + DCTSIZE2 * 0];
383    corr =  divisors[i + DCTSIZE2 * 1];
384    shift = divisors[i + DCTSIZE2 * 3];
385
386    if (temp < 0) {
387      temp = -temp;
388      product = (UDCTELEM2)(temp + corr) * recip;
389      product >>= shift + sizeof(DCTELEM)*8;
390      temp = product;
391      temp = -temp;
392    } else {
393      product = (UDCTELEM2)(temp + corr) * recip;
394      product >>= shift + sizeof(DCTELEM)*8;
395      temp = product;
396    }
397
398    output_ptr[i] = (JCOEF) temp;
399  }
400}
401
402
403/*
404 * Perform forward DCT on one or more blocks of a component.
405 *
406 * The input samples are taken from the sample_data[] array starting at
407 * position start_row/start_col, and moving to the right for any additional
408 * blocks. The quantized coefficients are returned in coef_blocks[].
409 */
410
411METHODDEF(void)
412forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
413	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
414	     JDIMENSION start_row, JDIMENSION start_col,
415	     JDIMENSION num_blocks)
416/* This version is used for integer DCT implementations. */
417{
418  /* This routine is heavily used, so it's worth coding it tightly. */
419  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
420  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
421  DCTELEM * workspace;
422  JDIMENSION bi;
423
424  /* Make sure the compiler doesn't look up these every pass */
425  forward_DCT_method_ptr do_dct = fdct->dct;
426  convsamp_method_ptr do_convsamp = fdct->convsamp;
427  quantize_method_ptr do_quantize = fdct->quantize;
428  workspace = fdct->workspace;
429
430  sample_data += start_row;	/* fold in the vertical offset once */
431
432  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
433    /* Load data into workspace, applying unsigned->signed conversion */
434    (*do_convsamp) (sample_data, start_col, workspace);
435
436    /* Perform the DCT */
437    (*do_dct) (workspace);
438
439    /* Quantize/descale the coefficients, and store into coef_blocks[] */
440    (*do_quantize) (coef_blocks[bi], divisors, workspace);
441  }
442}
443
444
445#ifdef DCT_FLOAT_SUPPORTED
446
447
448METHODDEF(void)
449convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
450{
451  register FAST_FLOAT *workspaceptr;
452  register JSAMPROW elemptr;
453  register int elemr;
454
455  workspaceptr = workspace;
456  for (elemr = 0; elemr < DCTSIZE; elemr++) {
457    elemptr = sample_data[elemr] + start_col;
458#if DCTSIZE == 8		/* unroll the inner loop */
459    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
460    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
461    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
462    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
463    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
464    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
465    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
466    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
467#else
468    {
469      register int elemc;
470      for (elemc = DCTSIZE; elemc > 0; elemc--)
471        *workspaceptr++ = (FAST_FLOAT)
472                          (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
473    }
474#endif
475  }
476}
477
478
479METHODDEF(void)
480quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
481{
482  register FAST_FLOAT temp;
483  register int i;
484  register JCOEFPTR output_ptr = coef_block;
485
486  for (i = 0; i < DCTSIZE2; i++) {
487    /* Apply the quantization and scaling factor */
488    temp = workspace[i] * divisors[i];
489
490    /* Round to nearest integer.
491     * Since C does not specify the direction of rounding for negative
492     * quotients, we have to force the dividend positive for portability.
493     * The maximum coefficient size is +-16K (for 12-bit data), so this
494     * code should work for either 16-bit or 32-bit ints.
495     */
496    output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
497  }
498}
499
500
501METHODDEF(void)
502forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
503		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
504		   JDIMENSION start_row, JDIMENSION start_col,
505		   JDIMENSION num_blocks)
506/* This version is used for floating-point DCT implementations. */
507{
508  /* This routine is heavily used, so it's worth coding it tightly. */
509  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
510  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
511  FAST_FLOAT * workspace;
512  JDIMENSION bi;
513
514
515  /* Make sure the compiler doesn't look up these every pass */
516  float_DCT_method_ptr do_dct = fdct->float_dct;
517  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
518  float_quantize_method_ptr do_quantize = fdct->float_quantize;
519  workspace = fdct->float_workspace;
520
521  sample_data += start_row;	/* fold in the vertical offset once */
522
523  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
524    /* Load data into workspace, applying unsigned->signed conversion */
525    (*do_convsamp) (sample_data, start_col, workspace);
526
527    /* Perform the DCT */
528    (*do_dct) (workspace);
529
530    /* Quantize/descale the coefficients, and store into coef_blocks[] */
531    (*do_quantize) (coef_blocks[bi], divisors, workspace);
532  }
533}
534
535#endif /* DCT_FLOAT_SUPPORTED */
536
537
538/*
539 * Initialize FDCT manager.
540 */
541
542GLOBAL(void)
543jinit_forward_dct (j_compress_ptr cinfo)
544{
545  my_fdct_ptr fdct;
546  int i;
547
548  fdct = (my_fdct_ptr)
549    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
550				SIZEOF(my_fdct_controller));
551  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
552  fdct->pub.start_pass = start_pass_fdctmgr;
553
554  /* First determine the DCT... */
555  switch (cinfo->dct_method) {
556#ifdef DCT_ISLOW_SUPPORTED
557  case JDCT_ISLOW:
558    fdct->pub.forward_DCT = forward_DCT;
559    if (jsimd_can_fdct_islow())
560      fdct->dct = jsimd_fdct_islow;
561    else
562      fdct->dct = jpeg_fdct_islow;
563    break;
564#endif
565#ifdef DCT_IFAST_SUPPORTED
566  case JDCT_IFAST:
567    fdct->pub.forward_DCT = forward_DCT;
568    if (jsimd_can_fdct_ifast())
569      fdct->dct = jsimd_fdct_ifast;
570    else
571      fdct->dct = jpeg_fdct_ifast;
572    break;
573#endif
574#ifdef DCT_FLOAT_SUPPORTED
575  case JDCT_FLOAT:
576    fdct->pub.forward_DCT = forward_DCT_float;
577    if (jsimd_can_fdct_float())
578      fdct->float_dct = jsimd_fdct_float;
579    else
580      fdct->float_dct = jpeg_fdct_float;
581    break;
582#endif
583  default:
584    ERREXIT(cinfo, JERR_NOT_COMPILED);
585    break;
586  }
587
588  /* ...then the supporting stages. */
589  switch (cinfo->dct_method) {
590#ifdef DCT_ISLOW_SUPPORTED
591  case JDCT_ISLOW:
592#endif
593#ifdef DCT_IFAST_SUPPORTED
594  case JDCT_IFAST:
595#endif
596#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
597    if (jsimd_can_convsamp())
598      fdct->convsamp = jsimd_convsamp;
599    else
600      fdct->convsamp = convsamp;
601    if (jsimd_can_quantize())
602      fdct->quantize = jsimd_quantize;
603    else
604      fdct->quantize = quantize;
605    break;
606#endif
607#ifdef DCT_FLOAT_SUPPORTED
608  case JDCT_FLOAT:
609    if (jsimd_can_convsamp_float())
610      fdct->float_convsamp = jsimd_convsamp_float;
611    else
612      fdct->float_convsamp = convsamp_float;
613    if (jsimd_can_quantize_float())
614      fdct->float_quantize = jsimd_quantize_float;
615    else
616      fdct->float_quantize = quantize_float;
617    break;
618#endif
619  default:
620    ERREXIT(cinfo, JERR_NOT_COMPILED);
621    break;
622  }
623
624  /* Allocate workspace memory */
625#ifdef DCT_FLOAT_SUPPORTED
626  if (cinfo->dct_method == JDCT_FLOAT)
627    fdct->float_workspace = (FAST_FLOAT *)
628      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
629				  SIZEOF(FAST_FLOAT) * DCTSIZE2);
630  else
631#endif
632    fdct->workspace = (DCTELEM *)
633      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
634				  SIZEOF(DCTELEM) * DCTSIZE2);
635
636  /* Mark divisor tables unallocated */
637  for (i = 0; i < NUM_QUANT_TBLS; i++) {
638    fdct->divisors[i] = NULL;
639#ifdef DCT_FLOAT_SUPPORTED
640    fdct->float_divisors[i] = NULL;
641#endif
642  }
643}
644