1/*
2 * jsimd_x86_64.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander.
6 * Copyright (C) 2015, Matthieu Darbois.
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 64-bit x86 architecture.
15 */
16
17#define JPEG_INTERNALS
18#include "../jinclude.h"
19#include "../jpeglib.h"
20#include "../jsimd.h"
21#include "../jdct.h"
22#include "../jsimddct.h"
23#include "jsimd.h"
24
25/*
26 * In the PIC cases, we have no guarantee that constants will keep
27 * their alignment. This macro allows us to verify it at runtime.
28 */
29#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
30
31#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32
33static unsigned int simd_support = ~0;
34static unsigned int simd_huffman = 1;
35
36/*
37 * Check what SIMD accelerations are supported.
38 *
39 * FIXME: This code is racy under a multi-threaded environment.
40 */
41LOCAL(void)
42init_simd (void)
43{
44  char *env = NULL;
45
46  if (simd_support != ~0U)
47    return;
48
49  simd_support = JSIMD_SSE2 | JSIMD_SSE;
50
51  /* Force different settings through environment variables */
52  env = getenv("JSIMD_FORCENONE");
53  if ((env != NULL) && (strcmp(env, "1") == 0))
54    simd_support = 0;
55  env = getenv("JSIMD_NOHUFFENC");
56  if ((env != NULL) && (strcmp(env, "1") == 0))
57    simd_huffman = 0;
58}
59
60GLOBAL(int)
61jsimd_can_rgb_ycc (void)
62{
63  init_simd();
64
65  /* The code is optimised for these values only */
66  if (BITS_IN_JSAMPLE != 8)
67    return 0;
68  if (sizeof(JDIMENSION) != 4)
69    return 0;
70  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
71    return 0;
72
73  if ((simd_support & JSIMD_SSE2) &&
74      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
75    return 1;
76
77  return 0;
78}
79
80GLOBAL(int)
81jsimd_can_rgb_gray (void)
82{
83  init_simd();
84
85  /* The code is optimised for these values only */
86  if (BITS_IN_JSAMPLE != 8)
87    return 0;
88  if (sizeof(JDIMENSION) != 4)
89    return 0;
90  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
91    return 0;
92
93  if ((simd_support & JSIMD_SSE2) &&
94      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
95    return 1;
96
97  return 0;
98}
99
100GLOBAL(int)
101jsimd_can_ycc_rgb (void)
102{
103  init_simd();
104
105  /* The code is optimised for these values only */
106  if (BITS_IN_JSAMPLE != 8)
107    return 0;
108  if (sizeof(JDIMENSION) != 4)
109    return 0;
110  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
111    return 0;
112
113  if ((simd_support & JSIMD_SSE2) &&
114      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
115    return 1;
116
117  return 0;
118}
119
120GLOBAL(int)
121jsimd_can_ycc_rgb565 (void)
122{
123  return 0;
124}
125
126GLOBAL(void)
127jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
128                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
129                       JDIMENSION output_row, int num_rows)
130{
131  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
132
133  switch(cinfo->in_color_space) {
134    case JCS_EXT_RGB:
135      sse2fct=jsimd_extrgb_ycc_convert_sse2;
136      break;
137    case JCS_EXT_RGBX:
138    case JCS_EXT_RGBA:
139      sse2fct=jsimd_extrgbx_ycc_convert_sse2;
140      break;
141    case JCS_EXT_BGR:
142      sse2fct=jsimd_extbgr_ycc_convert_sse2;
143      break;
144    case JCS_EXT_BGRX:
145    case JCS_EXT_BGRA:
146      sse2fct=jsimd_extbgrx_ycc_convert_sse2;
147      break;
148    case JCS_EXT_XBGR:
149    case JCS_EXT_ABGR:
150      sse2fct=jsimd_extxbgr_ycc_convert_sse2;
151      break;
152    case JCS_EXT_XRGB:
153    case JCS_EXT_ARGB:
154      sse2fct=jsimd_extxrgb_ycc_convert_sse2;
155      break;
156    default:
157      sse2fct=jsimd_rgb_ycc_convert_sse2;
158      break;
159  }
160
161  sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
162}
163
164GLOBAL(void)
165jsimd_rgb_gray_convert (j_compress_ptr cinfo,
166                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
167                        JDIMENSION output_row, int num_rows)
168{
169  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
170
171  switch(cinfo->in_color_space) {
172    case JCS_EXT_RGB:
173      sse2fct=jsimd_extrgb_gray_convert_sse2;
174      break;
175    case JCS_EXT_RGBX:
176    case JCS_EXT_RGBA:
177      sse2fct=jsimd_extrgbx_gray_convert_sse2;
178      break;
179    case JCS_EXT_BGR:
180      sse2fct=jsimd_extbgr_gray_convert_sse2;
181      break;
182    case JCS_EXT_BGRX:
183    case JCS_EXT_BGRA:
184      sse2fct=jsimd_extbgrx_gray_convert_sse2;
185      break;
186    case JCS_EXT_XBGR:
187    case JCS_EXT_ABGR:
188      sse2fct=jsimd_extxbgr_gray_convert_sse2;
189      break;
190    case JCS_EXT_XRGB:
191    case JCS_EXT_ARGB:
192      sse2fct=jsimd_extxrgb_gray_convert_sse2;
193      break;
194    default:
195      sse2fct=jsimd_rgb_gray_convert_sse2;
196      break;
197  }
198
199  sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
200}
201
202GLOBAL(void)
203jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
204                       JSAMPIMAGE input_buf, JDIMENSION input_row,
205                       JSAMPARRAY output_buf, int num_rows)
206{
207  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
208
209  switch(cinfo->out_color_space) {
210    case JCS_EXT_RGB:
211      sse2fct=jsimd_ycc_extrgb_convert_sse2;
212      break;
213    case JCS_EXT_RGBX:
214    case JCS_EXT_RGBA:
215      sse2fct=jsimd_ycc_extrgbx_convert_sse2;
216      break;
217    case JCS_EXT_BGR:
218      sse2fct=jsimd_ycc_extbgr_convert_sse2;
219      break;
220    case JCS_EXT_BGRX:
221    case JCS_EXT_BGRA:
222      sse2fct=jsimd_ycc_extbgrx_convert_sse2;
223      break;
224    case JCS_EXT_XBGR:
225    case JCS_EXT_ABGR:
226      sse2fct=jsimd_ycc_extxbgr_convert_sse2;
227      break;
228    case JCS_EXT_XRGB:
229    case JCS_EXT_ARGB:
230      sse2fct=jsimd_ycc_extxrgb_convert_sse2;
231      break;
232    default:
233      sse2fct=jsimd_ycc_rgb_convert_sse2;
234      break;
235  }
236
237  sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
238}
239
240GLOBAL(void)
241jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
242                          JSAMPIMAGE input_buf, JDIMENSION input_row,
243                          JSAMPARRAY output_buf, int num_rows)
244{
245}
246
247GLOBAL(int)
248jsimd_can_h2v2_downsample (void)
249{
250  init_simd();
251
252  /* The code is optimised for these values only */
253  if (BITS_IN_JSAMPLE != 8)
254    return 0;
255  if (sizeof(JDIMENSION) != 4)
256    return 0;
257
258  if (simd_support & JSIMD_SSE2)
259    return 1;
260
261  return 0;
262}
263
264GLOBAL(int)
265jsimd_can_h2v1_downsample (void)
266{
267  init_simd();
268
269  /* The code is optimised for these values only */
270  if (BITS_IN_JSAMPLE != 8)
271    return 0;
272  if (sizeof(JDIMENSION) != 4)
273    return 0;
274
275  if (simd_support & JSIMD_SSE2)
276    return 1;
277
278  return 0;
279}
280
281GLOBAL(void)
282jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
283                       JSAMPARRAY input_data, JSAMPARRAY output_data)
284{
285  jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
286                             compptr->v_samp_factor, compptr->width_in_blocks,
287                             input_data, output_data);
288}
289
290GLOBAL(void)
291jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
292                       JSAMPARRAY input_data, JSAMPARRAY output_data)
293{
294  jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
295                             compptr->v_samp_factor, compptr->width_in_blocks,
296                             input_data, output_data);
297}
298
299GLOBAL(int)
300jsimd_can_h2v2_upsample (void)
301{
302  init_simd();
303
304  /* The code is optimised for these values only */
305  if (BITS_IN_JSAMPLE != 8)
306    return 0;
307  if (sizeof(JDIMENSION) != 4)
308    return 0;
309
310  if (simd_support & JSIMD_SSE2)
311    return 1;
312
313  return 0;
314}
315
316GLOBAL(int)
317jsimd_can_h2v1_upsample (void)
318{
319  init_simd();
320
321  /* The code is optimised for these values only */
322  if (BITS_IN_JSAMPLE != 8)
323    return 0;
324  if (sizeof(JDIMENSION) != 4)
325    return 0;
326
327  if (simd_support & JSIMD_SSE2)
328    return 1;
329
330  return 0;
331}
332
333GLOBAL(void)
334jsimd_h2v2_upsample (j_decompress_ptr cinfo,
335                     jpeg_component_info *compptr,
336                     JSAMPARRAY input_data,
337                     JSAMPARRAY *output_data_ptr)
338{
339  jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
340                           input_data, output_data_ptr);
341}
342
343GLOBAL(void)
344jsimd_h2v1_upsample (j_decompress_ptr cinfo,
345                     jpeg_component_info *compptr,
346                     JSAMPARRAY input_data,
347                     JSAMPARRAY *output_data_ptr)
348{
349  jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
350                           input_data, output_data_ptr);
351}
352
353GLOBAL(int)
354jsimd_can_h2v2_fancy_upsample (void)
355{
356  init_simd();
357
358  /* The code is optimised for these values only */
359  if (BITS_IN_JSAMPLE != 8)
360    return 0;
361  if (sizeof(JDIMENSION) != 4)
362    return 0;
363
364  if ((simd_support & JSIMD_SSE2) &&
365      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
366    return 1;
367
368  return 0;
369}
370
371GLOBAL(int)
372jsimd_can_h2v1_fancy_upsample (void)
373{
374  init_simd();
375
376  /* The code is optimised for these values only */
377  if (BITS_IN_JSAMPLE != 8)
378    return 0;
379  if (sizeof(JDIMENSION) != 4)
380    return 0;
381
382  if ((simd_support & JSIMD_SSE2) &&
383      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
384    return 1;
385
386  return 0;
387}
388
389GLOBAL(void)
390jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
391                           jpeg_component_info *compptr,
392                           JSAMPARRAY input_data,
393                           JSAMPARRAY *output_data_ptr)
394{
395  jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
396                                 compptr->downsampled_width, input_data,
397                                 output_data_ptr);
398}
399
400GLOBAL(void)
401jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
402                           jpeg_component_info *compptr,
403                           JSAMPARRAY input_data,
404                           JSAMPARRAY *output_data_ptr)
405{
406  jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
407                                 compptr->downsampled_width, input_data,
408                                 output_data_ptr);
409}
410
411GLOBAL(int)
412jsimd_can_h2v2_merged_upsample (void)
413{
414  init_simd();
415
416  /* The code is optimised for these values only */
417  if (BITS_IN_JSAMPLE != 8)
418    return 0;
419  if (sizeof(JDIMENSION) != 4)
420    return 0;
421
422  if ((simd_support & JSIMD_SSE2) &&
423      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
424    return 1;
425
426  return 0;
427}
428
429GLOBAL(int)
430jsimd_can_h2v1_merged_upsample (void)
431{
432  init_simd();
433
434  /* The code is optimised for these values only */
435  if (BITS_IN_JSAMPLE != 8)
436    return 0;
437  if (sizeof(JDIMENSION) != 4)
438    return 0;
439
440  if ((simd_support & JSIMD_SSE2) &&
441      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
442    return 1;
443
444  return 0;
445}
446
447GLOBAL(void)
448jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
449                            JSAMPIMAGE input_buf,
450                            JDIMENSION in_row_group_ctr,
451                            JSAMPARRAY output_buf)
452{
453  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
454
455  switch(cinfo->out_color_space) {
456    case JCS_EXT_RGB:
457      sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
458      break;
459    case JCS_EXT_RGBX:
460    case JCS_EXT_RGBA:
461      sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
462      break;
463    case JCS_EXT_BGR:
464      sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
465      break;
466    case JCS_EXT_BGRX:
467    case JCS_EXT_BGRA:
468      sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
469      break;
470    case JCS_EXT_XBGR:
471    case JCS_EXT_ABGR:
472      sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
473      break;
474    case JCS_EXT_XRGB:
475    case JCS_EXT_ARGB:
476      sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
477      break;
478    default:
479      sse2fct=jsimd_h2v2_merged_upsample_sse2;
480      break;
481  }
482
483  sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
484}
485
486GLOBAL(void)
487jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
488                            JSAMPIMAGE input_buf,
489                            JDIMENSION in_row_group_ctr,
490                            JSAMPARRAY output_buf)
491{
492  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
493
494  switch(cinfo->out_color_space) {
495    case JCS_EXT_RGB:
496      sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
497      break;
498    case JCS_EXT_RGBX:
499    case JCS_EXT_RGBA:
500      sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
501      break;
502    case JCS_EXT_BGR:
503      sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
504      break;
505    case JCS_EXT_BGRX:
506    case JCS_EXT_BGRA:
507      sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
508      break;
509    case JCS_EXT_XBGR:
510    case JCS_EXT_ABGR:
511      sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
512      break;
513    case JCS_EXT_XRGB:
514    case JCS_EXT_ARGB:
515      sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
516      break;
517    default:
518      sse2fct=jsimd_h2v1_merged_upsample_sse2;
519      break;
520  }
521
522  sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
523}
524
525GLOBAL(int)
526jsimd_can_convsamp (void)
527{
528  init_simd();
529
530  /* The code is optimised for these values only */
531  if (DCTSIZE != 8)
532    return 0;
533  if (BITS_IN_JSAMPLE != 8)
534    return 0;
535  if (sizeof(JDIMENSION) != 4)
536    return 0;
537  if (sizeof(DCTELEM) != 2)
538    return 0;
539
540  if (simd_support & JSIMD_SSE2)
541    return 1;
542
543  return 0;
544}
545
546GLOBAL(int)
547jsimd_can_convsamp_float (void)
548{
549  init_simd();
550
551  /* The code is optimised for these values only */
552  if (DCTSIZE != 8)
553    return 0;
554  if (BITS_IN_JSAMPLE != 8)
555    return 0;
556  if (sizeof(JDIMENSION) != 4)
557    return 0;
558  if (sizeof(FAST_FLOAT) != 4)
559    return 0;
560
561  if (simd_support & JSIMD_SSE2)
562    return 1;
563
564  return 0;
565}
566
567GLOBAL(void)
568jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
569                DCTELEM *workspace)
570{
571  jsimd_convsamp_sse2(sample_data, start_col, workspace);
572}
573
574GLOBAL(void)
575jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
576                      FAST_FLOAT *workspace)
577{
578  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
579}
580
581GLOBAL(int)
582jsimd_can_fdct_islow (void)
583{
584  init_simd();
585
586  /* The code is optimised for these values only */
587  if (DCTSIZE != 8)
588    return 0;
589  if (sizeof(DCTELEM) != 2)
590    return 0;
591
592  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
593    return 1;
594
595  return 0;
596}
597
598GLOBAL(int)
599jsimd_can_fdct_ifast (void)
600{
601  init_simd();
602
603  /* The code is optimised for these values only */
604  if (DCTSIZE != 8)
605    return 0;
606  if (sizeof(DCTELEM) != 2)
607    return 0;
608
609  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
610    return 1;
611
612  return 0;
613}
614
615GLOBAL(int)
616jsimd_can_fdct_float (void)
617{
618  init_simd();
619
620  /* The code is optimised for these values only */
621  if (DCTSIZE != 8)
622    return 0;
623  if (sizeof(FAST_FLOAT) != 4)
624    return 0;
625
626  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
627    return 1;
628
629  return 0;
630}
631
632GLOBAL(void)
633jsimd_fdct_islow (DCTELEM *data)
634{
635  jsimd_fdct_islow_sse2(data);
636}
637
638GLOBAL(void)
639jsimd_fdct_ifast (DCTELEM *data)
640{
641  jsimd_fdct_ifast_sse2(data);
642}
643
644GLOBAL(void)
645jsimd_fdct_float (FAST_FLOAT *data)
646{
647  jsimd_fdct_float_sse(data);
648}
649
650GLOBAL(int)
651jsimd_can_quantize (void)
652{
653  init_simd();
654
655  /* The code is optimised for these values only */
656  if (DCTSIZE != 8)
657    return 0;
658  if (sizeof(JCOEF) != 2)
659    return 0;
660  if (sizeof(DCTELEM) != 2)
661    return 0;
662
663  if (simd_support & JSIMD_SSE2)
664    return 1;
665
666  return 0;
667}
668
669GLOBAL(int)
670jsimd_can_quantize_float (void)
671{
672  init_simd();
673
674  /* The code is optimised for these values only */
675  if (DCTSIZE != 8)
676    return 0;
677  if (sizeof(JCOEF) != 2)
678    return 0;
679  if (sizeof(FAST_FLOAT) != 4)
680    return 0;
681
682  if (simd_support & JSIMD_SSE2)
683    return 1;
684
685  return 0;
686}
687
688GLOBAL(void)
689jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors,
690                DCTELEM *workspace)
691{
692  jsimd_quantize_sse2(coef_block, divisors, workspace);
693}
694
695GLOBAL(void)
696jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors,
697                      FAST_FLOAT *workspace)
698{
699  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
700}
701
702GLOBAL(int)
703jsimd_can_idct_2x2 (void)
704{
705  init_simd();
706
707  /* The code is optimised for these values only */
708  if (DCTSIZE != 8)
709    return 0;
710  if (sizeof(JCOEF) != 2)
711    return 0;
712  if (BITS_IN_JSAMPLE != 8)
713    return 0;
714  if (sizeof(JDIMENSION) != 4)
715    return 0;
716  if (sizeof(ISLOW_MULT_TYPE) != 2)
717    return 0;
718
719  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
720    return 1;
721
722  return 0;
723}
724
725GLOBAL(int)
726jsimd_can_idct_4x4 (void)
727{
728  init_simd();
729
730  /* The code is optimised for these values only */
731  if (DCTSIZE != 8)
732    return 0;
733  if (sizeof(JCOEF) != 2)
734    return 0;
735  if (BITS_IN_JSAMPLE != 8)
736    return 0;
737  if (sizeof(JDIMENSION) != 4)
738    return 0;
739  if (sizeof(ISLOW_MULT_TYPE) != 2)
740    return 0;
741
742  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
743    return 1;
744
745  return 0;
746}
747
748GLOBAL(void)
749jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
750                JCOEFPTR coef_block, JSAMPARRAY output_buf,
751                JDIMENSION output_col)
752{
753  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
754}
755
756GLOBAL(void)
757jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr,
758                JCOEFPTR coef_block, JSAMPARRAY output_buf,
759                JDIMENSION output_col)
760{
761  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
762}
763
764GLOBAL(int)
765jsimd_can_idct_islow (void)
766{
767  init_simd();
768
769  /* The code is optimised for these values only */
770  if (DCTSIZE != 8)
771    return 0;
772  if (sizeof(JCOEF) != 2)
773    return 0;
774  if (BITS_IN_JSAMPLE != 8)
775    return 0;
776  if (sizeof(JDIMENSION) != 4)
777    return 0;
778  if (sizeof(ISLOW_MULT_TYPE) != 2)
779    return 0;
780
781  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
782    return 1;
783
784  return 0;
785}
786
787GLOBAL(int)
788jsimd_can_idct_ifast (void)
789{
790  init_simd();
791
792  /* The code is optimised for these values only */
793  if (DCTSIZE != 8)
794    return 0;
795  if (sizeof(JCOEF) != 2)
796    return 0;
797  if (BITS_IN_JSAMPLE != 8)
798    return 0;
799  if (sizeof(JDIMENSION) != 4)
800    return 0;
801  if (sizeof(IFAST_MULT_TYPE) != 2)
802    return 0;
803  if (IFAST_SCALE_BITS != 2)
804    return 0;
805
806  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
807    return 1;
808
809  return 0;
810}
811
812GLOBAL(int)
813jsimd_can_idct_float (void)
814{
815  init_simd();
816
817  if (DCTSIZE != 8)
818    return 0;
819  if (sizeof(JCOEF) != 2)
820    return 0;
821  if (BITS_IN_JSAMPLE != 8)
822    return 0;
823  if (sizeof(JDIMENSION) != 4)
824    return 0;
825  if (sizeof(FAST_FLOAT) != 4)
826    return 0;
827  if (sizeof(FLOAT_MULT_TYPE) != 4)
828    return 0;
829
830  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
831    return 1;
832
833  return 0;
834}
835
836GLOBAL(void)
837jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr,
838                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
839                  JDIMENSION output_col)
840{
841  jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
842                        output_col);
843}
844
845GLOBAL(void)
846jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
847                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
848                  JDIMENSION output_col)
849{
850  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
851                        output_col);
852}
853
854GLOBAL(void)
855jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr,
856                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
857                  JDIMENSION output_col)
858{
859  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
860                        output_col);
861}
862
863GLOBAL(int)
864jsimd_can_huff_encode_one_block (void)
865{
866  init_simd();
867
868  if (DCTSIZE != 8)
869    return 0;
870  if (sizeof(JCOEF) != 2)
871    return 0;
872
873  if ((simd_support & JSIMD_SSE2) && simd_huffman &&
874      IS_ALIGNED_SSE(jconst_huff_encode_one_block))
875    return 1;
876
877  return 0;
878}
879
880GLOBAL(JOCTET*)
881jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block,
882                             int last_dc_val, c_derived_tbl *dctbl,
883                             c_derived_tbl *actbl)
884{
885  return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
886                                          dctbl, actbl);
887}
888